package Ernad::Presort::Fidek; use strict; use warnings; use base ('Ernad::Presorter'); use Carp qw(confess); use Data::Dumper; use File::Basename; use Ernad::Presort::Dates; use Ernad::Presort::Exfit; use Ernad::Presort::Fidek::Term; use Ernad::Presort::Fidek::Frex; use Ernad::Presort::Model; use Ernad::Presort::Ranfi; use Ernad::Presort::State; use Krichel::File; ## main function sub update_for_report { my $k=shift; my $repcode=shift // confess "I need a repcode here."; $k->setup(); ## extra to restrict in destination my $extra; ## only required for motto class my $ranid; if(not $k->{'f'}) { $k->{'f'}=Ernad::Presort::Exfit->new({'e'=>$k->{'e'}}); } if($k->motto() eq 'train') { ## the local version similar to the general clear $k->clear_dir(); $extra='_train'; } if($k->motto() eq 'class') { ## here the fidek has to find an agreement between model and ranfi $ranid=$k->concord_ranid(); ## make a second attempt; if(not $ranid) { use Ernad::Presort::Learn; ## it's a subroutine, not an object &Ernad::Presort::Learn::model($repcode,$k->{'e'}); $ranid=$k->concord_ranid(); } if(not $ranid) { $k->echo(__LINE__,"I have no model for $repcode."); return 0; } $k->{'ranid'}=$ranid; $k->{'rank'}=$k->{'r'}->load($ranid); ## set the extra $extra=$ranid; } my $f=$k->{'f'}; $f->set_report($repcode); $k->set_report($repcode); #my $fitclas=$k->get_fitclas(); my $fidek_dir=$k->set_dir('fidek'); my $exfit_dir=$f->set_dir('exfit'); my $exfit_files=$f->show_to_update($fidek_dir,$k->motto(),$extra); foreach my $exfit_file (sort keys %$exfit_files) { ## dont make class fideks when we have learnt the issue if($k->motto() eq 'class') { my $date=&Ernad::Presort::Dates::find_issuedate($exfit_file); if($k->{'e'}->{'s'}->is_futile($repcode,$date)) { $k->echo(__LINE__,"I class. $date is futile for $repcode."); next; } $k->echo(__LINE__,"$date is not futile for $repcode."); ## but the above is not enough, we also need to check whether the date ## is futile, because we may have empty report issues that don't make ## a train file my $state=$k->{'t'}->find($date); if(($state eq 'done') or ($state eq 'prenatal')) { $k->echo(__LINE__,"I don't make a class fidek for $state date $date."); next; } } #foreach my $fitcla (keys %$fitclas) { # $f->set_fitcla($fitcla); # $k->set_fitcla($fitcla); $k->build_file($exfit_file); #} } my $to_clear=$k->clear_dir(); #my $to_clear=$k->clear($fidek_dir); } ## FixMe: should go somewhere else. ## find concordance ranid sub concord_ranid { my $k=shift; ## concord_ranid is callable exterally $k->setup(); my $model_list=$k->{'m'}->list_by('ranid'); if(not $model_list) { $k->echo(__LINE__,'I have no model.'); return 0; } my $ranfi_list=$k->{'r'}->list_by('ranid'); if(not $ranfi_list) { $k->echo(__LINE__,'I have no ranfi.'); return 0; } my $common; my $max_shoti=''; my $best_ranid=''; foreach my $ranid (keys %$model_list) { if($ranfi_list->{$ranid}) { ## use the model time to find out how recent the ranid. my $shoti=$model_list->{$ranid}->{'shoti'}; $common->{$ranid}=$shoti; if(not $best_ranid) { $best_ranid=$ranid; } if(($shoti cmp $max_shoti) > 0) { $max_shoti=$shoti; $best_ranid=$ranid; } } } if(not $best_ranid) { $k->echo(__LINE__,"I can not concord on a ranid."); return 0; } else { $k->echo(__LINE__,"I concord on ranid $best_ranid"); } $k->{'ranid'}=$best_ranid; return $best_ranid; } sub setup { my $k=shift; ## extra to restrict in destination ## only required for motto class #my $ranid; if(not $k->{'f'}) { $k->{'f'}=Ernad::Presort::Exfit->new({'e'=>$k->{'e'}}); ## ## $k->{'f'}->{'repcode'}=$k->{'repcode'}; ## ## } if(not $k->{'r'}) { $k->{'r'}=Ernad::Presort::Ranfi->new({'e'=>$k->{'e'}}); } if(not $k->{'m'}) { $k->{'m'}=Ernad::Presort::Model->new({'e'=>$k->{'e'}}); } if(not $k->{'t'}) { $k->{'t'}=Ernad::Presort::State->new({'e'=>$k->{'e'}}); } if($k->{'e'}->{'conf'}->{'frast'}) { if(not $k->{'a'}) { $k->{'a'}=Ernad::Presort::Fidek::Frex->new({'e'=>$k->{'e'}}); } } ## ## if(not $k->{'x'}) { $k->{'x'}=Ernad::Presort::Fidek::Frex->new({'e'=>$k->{'e'}}); $k->{'x'}->motto($k->motto()); } if(not $k->{'w'}) { $k->{'w'}=Ernad::Presort::Fidek::Term->new({'e'=>$k->{'e'}}); $k->{'w'}->motto($k->motto()); } ## ## #$ranid=$k->{'m'}->last_ranid(); ## if there is no model, just exit #$k->{'ranid'}=$ranid; } sub clear_dir { my $k=shift; my $fidek_dir=$k->set_dir('fidek'); my $set_ranid=$k->{'ranid'} // ''; ## clear for randits foreach my $file (glob("$fidek_dir/*")) { ## delete fideks refering to past ranids if($set_ranid and $file=~m|\.([a-z0-9]+)\.json\.gz$|) { my $ranid=$1; if($ranid ne $set_ranid) { $k->echo(__LINE__,"The current ranid is $set_ranid. I clear $file."); unlink $file; } } } my $e=$k->{'e'} // $main::e // confess 'Where is my erimp?'; ## remove class files for which we have train files, i.e. past ones foreach my $train_file (glob("$fidek_dir/*train*")) { #my $date=&Ernad::Common::find_issuedate_from_file($train_file); my $date=$e->{'f'}->issuedate($train_file); foreach my $class_file (glob("$fidek_dir/$date*.*.json.gz")) { $k->echo(__LINE__,"I clear class fidek $class_file because I have $train_file."); unlink $class_file; } } } sub build_file { my $k=shift; my $in_file=shift // confess "I need a file here."; my $set_out_file=shift // ''; my $e=$k->{'e'} // $main::e // confess "I need my erimp here."; if($k->motto eq 'seed' and not $set_out_file) { confess "For the seed motto, I need an out_file set here."; } if($set_out_file and $k->motto ne 'seed') { confess "With a set_out_file, I need the motto to be 'seed'."; } if($set_out_file and not -f $set_out_file) { &Krichel::File::prepare($set_out_file); } ## optional indicator only do do certain papids my $restrict=shift // ''; if($restrict and not (ref $restrict eq 'HASH')) { confess "If you give me a restriction, it has to be a hashref."; } ## abuse the set_out_file to force work accepted papers only my $use_only_papids; if(ref($set_out_file) eq 'HASH') { $use_only_papids=$set_out_file; } ## exfit if(not $k->{'f'}) { $k->{'f'}=Ernad::Presort::Exfit->new({'e'=>$k->{'e'}}); ## FixMe: I should only use one of these lines $k->{'f'}->{'report'}=$k->{'report'}; $k->{'f'}->{'repcode'}=$k->{'repcode'}; # $k->{'f'}->{'fitcla'}='term'; } #if(not $k->{'treat_fit'}) { # $k->init_fitcla_functions(); #} ### my $fitcla=$k->{'fitcla'} // confess "I need a fitcla set here."; #my $fitcla='term'; ## clear any existing data ### undef $k->{$fitcla}; my $f=$k->{'f'} // confess "I need an exfit object here."; my $date=&Ernad::Presort::Dates::find_issuedate($in_file); my $out_dir=$k->set_dir('fidek'); ## out_file for train my $out_file; my $train_out_file="$out_dir/$date".'_train.json'; ## could be empty at the start of the report my $ranid=$k->{'ranid'} // ''; ## when we class we need a ranid if($k->motto() eq 'class' and not $ranid) { $k->echo(__LINE__,"I have no ranid, I try to concord."); $ranid=$k->concord_ranid(); if(not $ranid) { warn "I have no concorded ranid."; return 0; } } my $class_out_file="$out_dir/$date"."_$ranid.json"; my $motto=$k->motto(); ## set this for the scanners as well if(not $k->{'x'}) { ## 2018-12-18, in a haste, try to add it back on $k->{'x'}=Ernad::Presort::Fidek::Frex->new({'e'=>$k->{'e'}}); } $k->{'x'}->motto($motto); if(not $k->{'w'}) { $k->{'w'}=Ernad::Presort::Fidek::Term->new({'e'=>$k->{'e'}}); $k->{'w'}->motto($k->motto()); } if($motto eq 'train') { if(not &Ernad::Common::does_file_need_renewal("$train_out_file.gz", "$in_file")) { $k->echo(__LINE__,"I don't renew $train_out_file.gz from $in_file"); return 0; } if($ranid) { if(-f "$class_out_file.gz") { $k->echo(__LINE__,"I see $class_out_file.gz."); if(-f "$class_out_file.gz") { $k->echo(__LINE__,"I delete $class_out_file.gz because I see $train_out_file"); unlink("$class_out_file.gz"); } } } $out_file=$train_out_file } elsif($motto eq 'class') { if(not $ranid) { $k->echo(__LINE__,"I have no ranid."); return 0; } if(not &Ernad::Common::does_file_need_renewal("$class_out_file.gz", "$in_file")) { $k->echo(__LINE__,"I don't renew $class_out_file.gz from $in_file"); return 0; } ## don't renew if the $train_out_file exits if($e->{'conf'}->{'separate_doklis'}) { if(-f "$train_out_file.gz" and not -z "$train_out_file.gz") { $k->echo(__LINE__,"I don't renew $class_out_file.gz because I found $train_out_file.gz"); return 0; } } $out_file=$class_out_file; } elsif($motto eq 'seed') { if(not &Ernad::Common::does_file_need_renewal("$set_out_file.gz", "$in_file")) { $k->echo(__LINE__,"I don't renew $set_out_file.gz from $in_file."); return 0; } $out_file=$set_out_file; } else { confess "I don't know about your motto '$motto'"; } my $paper; my $count_papers=0; while($paper=$f->next_paper_from_file($in_file)) { my $papid=$paper->{'papid'} // confess "I need a papid here."; if($restrict and not $restrict->{$papid}) { next; } $count_papers++; foreach my $line (@{$paper->{'lines'}}) { ## term $k->{'w'}->add_fits($k,$line,$papid); ## frex $k->{'x'}->add_fits($k,$line,$papid); } } if($k->{'fits'}) { $k->save_json_with_gz($k->{'fits'},$out_file); } if($k->motto eq 'seed') { return $k->{'fits'}; } undef $k->{'fits'}; return $out_file; } ## called in Fidek::Frex and Fidek::Term sub add_fit { my $k=shift; my $fit=shift // confess "I need a fit here."; my $papid=shift // confess "I need a papid here."; ## how many to add, minus 1 my $adder=shift // 0; ## we hand it one less then the length we would add $adder=$adder+1; my $motto=$k->motto(); if($motto eq 'class') { if(not $k->{'rank'}->{$fit}) { return; } } if(not defined($k->{'fits'}->{$papid})) { $k->{'fits'}->{$papid}->{$fit}=$adder; return; } $k->{'fits'}->{$papid}->{$fit}+=$adder; } ## lists training fideks sub list { my $k=shift; #my $fitcla=shift // confess "I need a fitcla here."; my $what=shift // confess "I now need to know 'what' here."; #$k->set_fitcla($fitcla); my $fidek_dir=$k->set_dir('fidek'); #my $glob="$fidek_dir/*$fitcla*$what*"; my $glob="$fidek_dir/*$what*"; my @files=glob($glob); return \@files; } ## not sure if this is needed sub setup_exfits { my $k=shift; if(not $k->{'f'}) { $k->{'f'}=Ernad::Presort::Exfit->new({'e'=>$k->{'e'}}) } #my $fitcla=shift // $k->{'fitcla'}; #if(not $fitcla) { # confess "I need a fitclas here."; #} my $report=shift // $k->{'report'}; if(not $report) { print "I need a report here."; } $k->{'f'}->set_report($report); #$k->{'f'}->set_report($fitcla); } 1;