#!/usr/bin/perl

use strict;
use warnings;

use Carp qw(cluck longmess shortmess croak confess);
#use File::Copy;
use File::Path;
use File::Slurper;
use Data::Dumper;
use List::Util qw(shuffle);
use Statistics::Descriptive;

use Ernad::Common;
use Ernad::Constant;
use Ernad::Erimp;
use Ernad::Learn::Eval;

my $verbose=0;
my $default_issue_size=10;

my $impna=$ARGV[0];
if(not defined($impna)) {
  die "I need the impna.";
}
my $e=Ernad::Erimp->new({'impna'=>$impna});
if(not $e) {
  die "You gave me an invalid impna $impna\n";
}

my $eval_dir=$e->{'dir'}->{'eval'};
if(not -d $eval_dir) {
  mkpath $eval_dir;
}

## the all_handle_cache
my $a_h_c;

my $ernad_ns=$Ernad::Constant::c->{'ernad_ns'};
my $name='issue_evaluation';
my $xml_start="<$name xmlns=\"$ernad_ns\" ponori_param=\"[^\"]+\">\n";
my $xml_end="</$name>\n";

my @repcodes=shuffle $e->list_repcodes();

foreach my $repcode (@repcodes) {
  &work_on_report($repcode);
}

sub work_on_report {
  my $repcode=shift // confess "I need a repcode here.";
  my $out_file="$eval_dir/$repcode.xml";
  my $done;
  if(-f $out_file) {
    $done=&File::Slurper::read_text($out_file);
    $done=~s|$xml_start|| or confess "I could not remove the xml_start $xml_start from $done";
    $done=~s|$xml_end||;
  }
  else {
    $done='';
  }
  my $old_done=$done;
  my $count_texts;
  my $stat = Statistics::Descriptive::Full->new();
  foreach my $line (split("\n",$done)) {
    if($line=~m|<issue date=\"(\d{4}-\d{2}-\d{2})\"|) {
      $count_texts=0;
    }
    elsif($line=~m|</issue>|) {
      $stat->add_data($count_texts);
    }
    elsif($line=~m|<text|) {
      $count_texts++;
    }
  }
  my $average_issue_size=$stat->mean() // $default_issue_size;
  my $rosa_y=1+1/$average_issue_size;
  my $full_xml_start="<$name xmlns=\"$ernad_ns\" ponori_param=\"$rosa_y\">\n";
  $done=&get_eval($repcode,$done,$rosa_y);
  if($done ne $old_done) {
    ## remove any blank at the start
    $done=~s|^\s+||;
    $done="$full_xml_start$done$xml_end";
    &File::Slurper::write_text($out_file,"$done");
    if($verbose) {
      print "I wrote $out_file.\n";
    }
  }
  elsif($verbose) {
    print "I keep $out_file.\n";
  }
}

sub get_id {
  my $in=shift;
  my ($id,$weight,$crit)=split(' ',$in);
  return $id;
}

### parse past lines to find average issue size
#sub parse_average_issue_size {
#  my $file=shift // confess "I need a file here.";
#  if(not -f $file) {
#    confess "I can't open the file $file.";
#  }
#  open(F,"< $file");
#  my $line;
#  my $count_textss;
#  my $stat = Statistics::Descriptive::Full->new();
#  while($line=<F>) {
#    if($line=~m|<issue date=\"(\d{4}-\d{2}-\d{2})\"||) {
#      $count_texts=0;
#    }
#    elsif($line=~m|</issue>|) {
#      $stat->add_date($count);
#    }
#    elsif($line=~m|<text||) {
#      $count++;
#    }
#  }
#  die $stat->mean();
#}


sub get_eval {
  my $repcode=shift;
  my $done=shift;
  my $ponori_y=shift;
  my $count_get=0;
  my $selected_dir=$e->{'report'}->{$repcode}->{'dir'}->{'selected'};
  my $issues=$e->{'d'}->last_date_times($selected_dir);
  foreach my $issuedate (sort keys %{$issues}) {
    if(length($done) and $done=~m|$issuedate|) {
      if($verbose) {
        print "I skip $issuedate\n";
      }
      next;
    }
    else {
      if($verbose) {
        print "I have to do $issuedate\n";
      }
    }
    ## insert done here
    my $add=&gather_data_for_issue($repcode,$issuedate,$ponori_y) or next;
    $done="$add$done";
  }
  return $done;
}

sub gather_data_for_issue {
  my $repcode=shift;
  my $issuedate=shift;
  my $ponori_y=shift;
  if($ponori_y<1) {
    confess "I need a ponori_y that's greater than 1";
  }
  if(not defined($a_h_c->{$issuedate})) {
    my $sent_text=&get_order_text($issuedate) or return;
    my $pos=1;
    foreach my $line (split("\n",$sent_text)) {
      my $id=&get_id($line);
      $a_h_c->{$issuedate}->{$id}=$pos++;
    }
  }
  my $allport_total=scalar(keys %{$a_h_c->{$issuedate}});
  ## gather the arguments for the ponori
  my $eval_args=[$allport_total];
  ## this will also contain data about the criterium
  my $select_text=&get_order_text($issuedate,$repcode,'selected');
  if(not $select_text) {
    if($verbose) {
      print "I have no selected_text at $repcode, $issuedate\n";
    }
    return;
  }
  ## check if this does not contain all thepape
  my $total_selected=scalar(split("\n",$select_text));
  my $total_allport=scalar(keys %{$a_h_c->{$issuedate}});
  if($total_selected == $total_allport) {
    if($verbose) {
      print "All papers select, I am trying ordered at $repcode, $issuedate\n";
    }
    $select_text=&get_order_text($issuedate,$repcode,'ordered');
    if(not $select_text) {
      if($verbose) {
        print "I have no ordered_text at $repcode, $issuedate\n";
      }
      return;
    }
  }
  my $select;
  foreach my $line (split("\n",$select_text)) {
    my $id=&get_id($line);
    $select->{$id}=1;
  }
  my $ps_text=&get_order_text($issuedate,$repcode,'presorted');
  if(not $ps_text) {
    print "I have no ps_text at $repcode, $issuedate\n";
    return;
  }
  my $ps;
  my @lines=split("\n",$ps_text);
  ## find the criterium
  my @parts=split(' ',$lines[0]);
  my $crit=$parts[2] // '';
  my $out='';
  my $pos=1;
  foreach my $line (split("\n",$ps_text)) {
    my $id=&get_id($line);
    if(not defined($select->{$id})) {
      $pos++;
      next;
    }
    my $sps=$a_h_c->{$issuedate}->{$id} // 0;
    if(not $sps) {
      print "I can't fined $id\n";
      print Dumper $a_h_c->{$issuedate};
      exit;
    }
    $out.="  <text pos=\"$pos\" allpos=\"$sps\"/>\n";
    push(@{$eval_args},$pos);
    $pos++;
  }

  ## now create the head line
  my $head=" <issue date=\"$issuedate\" total=\"$allport_total\"";
  my $ponori=&Ernad::Learn::Eval::ponori($eval_args,$ponori_y);
  if(defined($ponori)) {
    $head.=" ponori=\"$ponori\"";
  }
  else {
    print "I have no ponori for $issuedate of $repcode\n";
  }
  if($crit) {
    $head.=" crit=\"$crit\">\n";
  }
  else {
    $head.=">\n";
  }
  $out=$head.$out;
  $out.=" </issue>\n";
  return $out;
}

sub get_order_text {
  my $issuedate=shift;
  my $repcode=shift // $e->get_allport_repcode();
  my $part=shift // 'sent';
  my $sent_dir=$e->{'report'}->{$repcode}->{'dir'}->{$part};
  my $sent_rif=$e->{'d'}->latest_rif($sent_dir,$issuedate);
  if(not $sent_rif) {
    if($verbose) {
      print "I don't have a rif for $repcode $issuedate in $part\n";
    }
    return '';
  }
  if(not -f $sent_rif) {
    confess "I can't open rif $sent_rif.";
  }
  my $sent_text=$e->{'t'}->t($sent_rif,'show_order','chars');
  return $sent_text;
}
