package Ernad::Presort::Chopa; use strict; use warnings; use Carp qw(confess); use Data::Dumper; use File::Basename; use File::Slurper; use File::Path; use base ('Ernad::Presorter'); use Ernad::Common; ## returns the repcode sub setup { my $c=shift; my $repcode=$c->get_repcode() // confess "I can't get the repcode."; my $e=$c->{'e'} // confess "I need an erimp here."; my $learn_dir=$e->{'dir'}->{'learn'}; ## set_dir can't be used here, as we use the report my $chopa_dir=$learn_dir.'/chopa/'.$repcode; if(not -d $chopa_dir) { $c->echo(__LINE__,"I make $chopa_dir."); mkpath($chopa_dir); } $c->{'dir'}->{'chopa'}=$chopa_dir; my $rerc=$e->{'report'}->{$repcode}; #if(not $rerc) { # ## Osborne transitory code # if($e->{'r'}) { # $e->{'r'}->load($repcode); # $rerc=$e->{'report'}->{$repcode}; # } # else { ## confess "I don't see your report $repcode"; # } #} #my $rerc=$e->{'report'}->{$repcode}; if(not $rerc) { $e->{'r'}->load($repcode); $rerc=$e->{'report'}->{$repcode}; } if(not $rerc) { confess "I don't know about your report '$repcode'."; } return $repcode; } sub status_by_handle_and_issuedate { my $c=shift; my $date=shift // confess "I need a date here."; my $handle=shift // confess "I need a handle here."; my $e=$c->{'e'} // $main::e // confess "I need my erimp here."; if(not $c->{'data'}) { ## populates the data $c->list(); } if(not $c->{'data'}->{$date}) { if($e->{'conf'}->{'sloppyChopa'}) { $e->echo(__LINE__,"WARN: I have no chopa data for $date.",1); return ''; } confess "I have no chopa data for $date."; } if(not defined($c->{'data'}->{$date}->{$handle})) { $c->echo(__LINE__,"I have no chopa data at $date for $handle."); ## try again $c->get_learn_base(); if(not defined($c->{'data'}->{$date}->{$handle})) { if($e->{'conf'}->{'sloppyChopa'}) { $e->echo(__LINE__,"WARN: I have no chopa data for '$handle' at '$date'.",1); return ''; } confess "I have no chopa data at for '$handle' at '$date'."; } } return $c->{'data'}->{$date}->{$handle}; } ## main sub update { my $c=shift; #c->set_report($report); ## this also sets the is_mature binary $c->set_sent_dates(); if(not $c->{'report'}->{$c->{'repcode'}}->{'is_mature'} and not $c->{'e'}->{'conf'}->{'no_seeds'}) { $c->get_seed_base(); } $c->get_learn_base(); } ## deals only with the seeds sub get_seed_base { my $c=shift; use Ernad::Presort::Seeds; $c->{'s'}=Ernad::Presort::Seeds->new(); my $sidin_file=$c->{'s'}->get_sidin_amf_file($c->{'repcode'}); if(not $sidin_file) { return 0; } ## the seed date is set in the Exfits # use Ernad::Presort::Exfit; # $c->{'f'}=Ernad::Presort::Exfit->new(); # $c->{'f'}->set_report($c->{'repcode'}); ## this was previously commented #my $seed_date=$c->{'f'}->set_seed_date(); my $seed_date=$c->{'s'}->set_seed_date(); my $chopa_dir=$c->{'dir'}->{'chopa'}; my $out_file=$chopa_dir.'/'.$seed_date.'.txt'; if(not &Ernad::Common::does_file_need_renewal($out_file,$sidin_file)) { return 0; } my $e=$c->{'e'} // $main::e // confess 'Where is my erimp?'; my $handles=$e->{'x'}->papids($sidin_file,'hash'); ## set to +1 foreach my $handle (keys %$handles) { $c->{'issues'}->{$seed_date}->{$handle}='+1'; } } sub set_sent_dates { my $c=shift; ## get all sent dates, not just the ones in the primitive range? my $do_all=shift // ''; my $repcode=$c->setup() // confess "I need a report here."; ## first get a list of all dates that we have sent issues for $c->{'rerc'}=$c->{'e'}->{'report'}->{$repcode} // confess "I need a rerc here."; my $sent_dir=$c->{'rerc'}->{'dir'}->{'sent'}; undef $c->{'dates'}->{'sent'}; $c->{'report'}->{$c->{'repcode'}}->{'is_mature'}=0; # my $sent_rifs=&Ernad::Common::get_latest_rif_from_each_issue_in_dir($sent_dir); my $e=$c->{'e'} // $main::e // confess 'Where is my erimp?'; my $sent_rifs=$e->{'d'}->get_latest_rif_from_each_issue_in_dir($sent_dir); foreach my $sent_rif (@$sent_rifs) { my $issuedate=$e->{'f'}->issuedate($sent_rif); if(not $c->is_issuedate_in_range($issuedate)) { ## mark the report as mature $c->{'report'}->{$c->{'repcode'}}->{'is_mature'}=1; if(not $do_all) { next; } } $c->{'dates'}->{'sent'}->{$issuedate}=1; } my $oldest_sent_date=$c->{'e'}->{'d'}->oldest_issuedate($sent_dir) // ''; $c->{'dates'}->{'oldest_sent'}=$oldest_sent_date; } ## all papers that we have decisions for, not including the sidin paper sub get_learn_base { my $c=shift; ## target can be sele or sent my $target=shift // ''; my $repcode=$c->setup() // confess "I need a report here."; my $e=$c->{'e'} // $main::e // confess 'Where is my erimp?'; ## first get a list of all dates that we have sent issues for my $rerc=$e->{'report'}->{$repcode} // confess "I need a rerc here."; my $sent_dir=$e->{'report'}->{$repcode}->{'dir'}->{'sent'}; my $sele_dir=$e->{'report'}->{$repcode}->{'dir'}->{'selected'}; if(not -d $sent_dir) { return ''; } if($target) { $c->set_sent_dates('do_all'); } else { $c->set_sent_dates(); } ## sets $c->{'dates_to_renew'}; if(not defined($c->{'dates_to_renew'})) { $c->set_dates_to_renew(); } ## the dates for which an issue has been sent my $ps_dir=$rerc->{'dir'}->{'presorted'}; if(not -d $ps_dir) { confess "I don't see your directory $ps_dir"; } my $ps_rifs; if(not $c->{$repcode}->{'base_rifs'}) { # $ps_rifs=&Ernad::Common::get_latest_rif_from_each_issue_in_dir($ps_dir); $ps_rifs=$e->{'d'}->get_latest_rif_from_each_issue_in_dir($ps_dir); $c->{$repcode}->{'base_rifs'}=$ps_rifs; } ## FixMe: I should not have to do the same for seeded rifs if(not $e->{'conf'}->{'no_seeds'}) { my $sd_dir=$rerc->{'dir'}->{'seeded'} // ''; if($sd_dir) { #my $sd_rifs=&Ernad::Common::get_latest_rif_from_each_issue_in_dir($sd_dir); my $sd_rifs=$e->{'d'}->get_latest_rif_from_each_issue_in_dir($sd_dir); ## in repeat seeding (after the selector did not do her job the first week), ## seeded rifs are in ps, so there may be nothing in sd if($sd_rifs) { push(@{$c->{$repcode}->{'base_rifs'}},@{$sd_rifs}); } } } ## no rifs at all? if(not $c->{$repcode}->{'base_rifs'}) { $c->echo(__LINE__,"I see no base rifs. This must be a virgin report."); ## but we still have to update for the seed issue foreach my $date (sort keys %{$c->{'issues'}}) { $c->update_for_date($date); } return 0; } ## FixMe, I should not have a sort here but it's there for debugging foreach my $base_rif (sort @{$c->{$repcode}->{'base_rifs'}}) { $c->echo(__LINE__,"I found $base_rif"); my $base_date=$e->{'f'}->issuedate($base_rif); ## it has not been sent or it is out of range if(not $c->{'dates'}->{'sent'}->{$base_date}) { $c->echo(__LINE__,"I take no chopas from $base_date because has not been sent."); next; } ## check if the outfile for the date is there, then it's almost ## surely useless to do the calculations again. The out_file will ## only be generated if there is is learning information my $chopa_dir=$c->{'dir'}->{'chopa'}; my $out_file=$chopa_dir.'/'.$base_date.'.txt'; if(-f $out_file) { my $chopa_date=$e->{'f'}->issuedate($out_file); if(not $c->{'dates_to_renew'}->{$chopa_date}) { $c->echo(__LINE__,"I see $out_file, and it's date is not in dates_to_renew."); } next; } my $handles=$e->{'x'}->papids($base_rif,'hash'); ## set to -1 foreach my $handle (keys %$handles) { $c->{'issues'}->{$base_date}->{$handle}='-1'; } } ## baseline: target rifs are the selecte # my $target_rifs=&Ernad::Common::get_latest_rif_from_each_issue_in_dir($sele_dir); #my $target_rifs=&Ernad::Common::get_latest_rif_from_each_issue_in_dir($sele_dir); my $target_rifs=$e->{'d'}->get_latest_rif_from_each_issue_in_dir($sele_dir); if($target eq 'sent') { #$target_rifs=&Ernad::Common::get_latest_rif_from_each_issue_in_dir($sent_dir); $target_rifs=$e->{'d'}->get_latest_rif_from_each_issue_in_dir($sent_dir); } foreach my $sele_rif (@$target_rifs) { my $sele_date=$e->{'f'}->issuedate($sele_rif); ## it has not been sent or it is out of range if(not $c->{'dates'}->{'sent'}->{$sele_date}) { $c->echo(__LINE__,"I take no chopas from $sele_date because has not been sent."); next; } if(not $c->is_to_renew($sele_date,$target)) { $c->echo(__LINE__,"I don't renew $sele_date."); next; } my $handles=$e->{'x'}->papids($sele_rif,'hash'); ## set to +1 foreach my $handle (keys %$handles) { $c->{'issues'}->{$sele_date}->{$handle}='+1'; } } foreach my $date (sort keys %{$c->{'issues'}}) { $c->update_for_date($date); } $c->clear($c->{'dir'}->{'chopa'}); } sub is_to_renew { my $c=shift; my $date=shift; my $target=shift // ''; my $chopa_dir=$c->{'dir'}->{'chopa'}; my $out_file=$chopa_dir.'/'.$date.'.txt'; if($target) { $out_file=$chopa_dir.'/'.$date.'_'.$target.'.txt'; } if(not -f $out_file) { return 1; } if(not $c->{'dates_to_renew'}->{$date}) { $c->echo(__LINE__,"I see $out_file, and it's date is not in dates_to_renew."); } return 0; } sub set_dates_to_renew { my $c=shift; $c->setup(); my $repcode=$c->get_repcode(); my $rerc=$c->{'e'}->{'report'}->{$repcode} // confess "I need a rerc here."; my $sent_dir=$c->{'e'}->{'report'}->{$repcode}->{'dir'}->{'sent'}; my $sele_dir=$c->{'e'}->{'report'}->{$repcode}->{'dir'}->{'selected'}; if(not -d $sent_dir) { return ''; } $c->set_sent_dates(); ## the dates for which an issue has been sent my $ps_dir=$rerc->{'dir'}->{'presorted'}; if(not -d $ps_dir) { confess "I don't see your directory $ps_dir."; } my $chopa_dir=$c->{'dir'}->{'chopa'} // confess 'I have no chopa_dir'; my $glob="$chopa_dir/*.txt"; my @files=glob("$chopa_dir/*.txt"); ## check if there is seeding to be don $c->{'dates_to_renew'}={}; if($c->{'e'}->{'seedable'}) { my $first=shift(@files) // ''; if($first) { $c->echo(__LINE__,"The report is seedable. I skip $first at update check."); } $c->echo(__LINE__,"I found no chopa files looking for $glob."); } my $count_to_renew; my $e=$c->{'e'} // $main::e // confess 'Where is my erimp?'; foreach my $chopa_file (sort @files) { $c->echo(__LINE__,"I see chopa file $chopa_file."); my $issuedate=$e->{'f'}->issuedate($chopa_file); foreach my $sele_file (glob("$sele_dir/$issuedate*")) { if(-M $sele_file < -M $chopa_file) { $c->{'dates_to_renew'}->{$issuedate}=1; } } foreach my $sent_file (glob("$sent_dir/$issuedate*")) { if(-M $sent_file < -M $chopa_file) { $c->{'dates_to_renew'}->{$issuedate}=1; } } } if(not $count_to_renew) { $c->echo(__LINE__,"I found no chopa to renew."); } } sub update_for_date { my $c=shift; my $report=$c->setup(); my $date=shift // confess "I need a date set here"; my $string=''; ## make the issues strings ## first included, then excluded, by handle order foreach my $handle (sort keys %{$c->{'issues'}->{$date}}) { my $state=$c->{'issues'}->{$date}->{$handle}; if(not $state) { confess "I have no state for $date and $handle\n"; } if($state == -1) { next; } $string.="$state $handle\n"; } foreach my $handle (sort keys %{$c->{'issues'}->{$date}}) { my $state=$c->{'issues'}->{$date}->{$handle}; if($state == 1) { next; } $string.="$state $handle\n"; } my $chopa_dir=$c->{'dir'}->{'chopa'}; my $file=$chopa_dir.'/'.$date.'.txt'; if(-f $file) { my $txt=File::Slurper::read_text($file); if($string eq $txt) { return 0; } } $c->echo(__LINE__,"I write $file."); File::Slurper::write_text($file,$string); } ## lists the chopa file, only old version sub list { my $c=shift; $c->setup(); $c->{'all'}={}; my $chopa_dir=$c->{'dir'}->{'chopa'} // confess 'I have no chopa_dir'; my @files=glob("$chopa_dir/????-??-??.txt"); foreach my $file (@files) { $c->echo(__LINE__,"chopa->list sees $file"); my $bana=basename($file); if(not $bana=~m|^(\d{4}-\d{2}-\d{2})|) { $c->echo(__LINE__,"I don't see a date in $file, I skip it."); next; } my $issuedate=$1; if(not $c->is_issuedate_in_range($issuedate)) { if(-M $file > 1) { ## remove an out-of-range file $c->echo(__LINE__,"I remove the out-of-range file $file."); unlink $file; next; } $c->echo(__LINE__,"I keep the recent out-of-range file $file."); } my $date=$1; my @lines=File::Slurper::read_lines($file); foreach my $line (@lines) { $c->{'dates'}->{$date}=1; my @parts=split(' ',$line); $c->{'data'}->{$date}->{$parts[1]}=$parts[0]; } } return $c->{'all'}; } sub get_hash_by_date { my $c=shift; my $date=shift // confess "I need an date here"; my $given_indic=shift // '+1'; my $target=shift // ''; my $chopa_dir=$c->{'dir'}->{'chopa'} // $c->set_dir('chopa'); my $file=$chopa_dir.'/'.$date.'.txt'; if($target) { $file=$chopa_dir.'/'.$date.'.'.$target.'txt'; } if(not $file) { return undef; } my $out={}; my @lines=File::Slurper::read_lines($file); foreach my $line (@lines) { $c->{'dates'}->{$date}=1; my @parts=split(' ',$line); my $indic=$parts[0]; my $papid=$parts[1]; if($indic eq $given_indic) { $out->{$papid}=$indic; } } return $out; } 1;