package Ernad::Presort::Vemli; use strict; use warnings; use base ('Ernad::Presorter'); use Carp qw(cluck confess); use Data::Dumper; use File::Basename; use File::Compare; use File::Copy; use IO::Uncompress::Gunzip; use URI::Escape; use Ernad::Presort::Fidek; use Ernad::Presort::Model; use Ernad::Presort::Ranfi; use Ernad::Presort::Weights; use Krichel::File; sub update_for_report { my $v=shift; my $report=shift // confess "I need a report here."; $v->setup(); ## has to be run for every report $v->setup_for_report($report); my $motto=$v->motto(); my $sources; if($motto eq 'train') { #$v->{'fitclas'}=$v->get_fitclas(); #$v->{'fitcla'}='term'; if(not $sources=$v->set_sources_to_do()) { $v->echo(__LINE__,"There is nothing to do for $report."); return 0; } if(not $v->{'ranfi'}) { ## loads last by default $v->{'ranfi'}=$v->{'r'}->load(); } } if($motto eq 'class') { my $ranid=$v->{'m'}->last_ranid(); ## if there is no model, just exit if(not $ranid) { $v->echo(__LINE__,"I have no model."); return 0; } $v->{'ranid'}=$ranid; if(not $sources=$v->set_sources_to_do()) { $v->echo(__LINE__,"There is nothing to do for $report."); return 0; } if(not $v->{'ranfi'}) { $v->echo(__LINE__,"I load"); $v->{'ranfi'}=$v->{'r'}->load($ranid); } #$v->get_fitclas(); } ## fideks only have training foreach my $fidek_file (sort keys %$sources) { $v->echo(__LINE__,"The fidek_file is $fidek_file"); $v->build_file($fidek_file); } } ## setups that are report independent sub setup { my $v=shift; if(not $v->{'e'}) { $v->{'e'}=$main::e // confess "I have no erimp."; } if(not $v->{'r'}) { $v->{'r'}=Ernad::Presort::Ranfi->new({'e'=>$v->{'e'}}); } if(not $v->{'k'}) { $v->{'k'}=Ernad::Presort::Fidek->new({'e'=>$v->{'e'}}); } ## really only used in ::Class if(not $v->{'m'}) { $v->{'m'}=Ernad::Presort::Model->new({'e'=>$v->{'e'}}); } $v->set_weighing_function(); } sub setup_for_report { my $v=shift; my $report=shift; if(not $v->{'r'}) { $v->setup(); } if(not $report) { $report=$v->{'e'}->{'repcode'} // confess "I need a report here."; } $v->set_report($report); $v->{'r'}->set_report($report); $v->{'k'}->set_report($report); $v->{'dir'}->{'fidek'}=$v->set_dir('fidek'); $v->{'dir'}->{'vemli'}=$v->set_dir('vemli'); $v->{'dir'}->{'class'}=$v->set_dir('class'); } sub set_weighing_function { my $v=shift; my $scheme=$v->{'e'}->{'conf'}->{'weighing_scheme'} or confess "I need a weighing scheme in the ernad configuration."; my $weigh=$Ernad::Presort::Weights::weigh->{$scheme} // confess "The weighing scheme $scheme appears not to be defined."; $v->{'weigh'}=$weigh; } sub build_file { my $v=shift; my $fidek_file=shift; my $e=$v->{'e'} // $main::e // confess 'Where is my erimp?'; my $date=$e->{'f'}->issuedate($fidek_file); if(not &Ernad::Dates::is($date)) { confess "You made up a date '$date'. I won't have it."; } ## defined in the base class if(not $v->is_issuedate_in_range($date)) { $v->echo(__LINE__,"The date '$date' in '$fidek_file' is out of range, I remove it."); if(-f $fidek_file) { unlink $fidek_file; } return 0; } my $ranid=$v->{'ranid'} // confess "I need a ranid defined here."; my $motto=$v->motto(); my $out_file=$v->set_dir('vemli')."/$date"."_$ranid"."_$motto.txt.gz"; if(not &Ernad::Common::does_file_need_renewal($out_file,$fidek_file)) { $v->echo(__LINE__,"I skip the renewal of $out_file over $fidek_file"); return 0; } &Krichel::File::prepare($out_file); my $tmp_fh = File::Temp->new(); my $tmp_name=$tmp_fh->filename(); #foreach my $fitcla (sort keys %{$v->{'fitclas'}}) { #if(not $fitcla eq 'term') { # confess "Fixme: I can only do for term."; #} my $fidek=$v->load_json_with_gz($fidek_file); foreach my $handle (sort keys %$fidek) { my $out=$v->prep($handle,$fidek->{$handle},$fidek_file); if(not $out) { next; } $out=~s|^\s||; print $tmp_fh $out; } $tmp_fh->close(); system("/bin/gzip -f $tmp_name"); my $tmp_file=$tmp_name.'.gz'; if(-f $out_file) { if(compare($tmp_file,$out_file) == 0) { unlink $tmp_file; return 0; } } copy($tmp_file,$out_file); return $out_file; #} } sub set_sources_to_do { my $v=shift; if(not $v->{'repcode'}) { confess "I need a report set here."; } my $motto=$v->motto(); ## to be determined via the motto my $restrict; if($motto eq 'train') { my $ranid=$v->{'r'}->find_last_ranid(); $v->{'ranid'}=$ranid; #my $fitcla=$v->{'fitcla'} // confess "I need my fitcla here."; #$restrict->{'source'}="_$fitcla"."_train"; $restrict->{'source'}="_train"; $restrict->{'destin'}="_$ranid"."_train"; } if($motto eq 'class') { my $ranid=$v->{'m'}->last_ranid(); $v->{'ranid'}=$ranid; $restrict->{'destin'}="$ranid"."_class"; $restrict->{'source'}="$ranid"; } my $sources=$v->get_sources($v->{'dir'}->{'fidek'},$v->{'dir'}->{'class'},$restrict); ## nothing to do? if(not scalar(keys %$sources)) { $v->{'sources'}->{'repcode'}=-1; return 0; } if(not $sources) { return {}; } $v->{'sources'}->{$v->{'repcode'}}=$sources; return $sources; } sub prep { my $v=shift; my $handle=shift; my $fits=shift; ## just for reporting my $file=shift; my $wm=$v->{'weigh'} // confess "I need a weighing scheme"; my $weights=&{$wm}($fits,$v->{'ranfi'}); my $safe_handle=uri_escape($handle); if(not $weights) { $v->echo(__LINE__,"I have no weights for handle $handle found in $file.",2); return 0; } my $out="$weights # ". "$safe_handle\n"; return $out; } sub find { my $v=shift; my $ranid=shift // ''; my $vemli_dir=$v->set_dir('vemli'); if(not $v->{'r'}) { $v->setup(); ###confess "I need this defined here."; } if(not $ranid) { $ranid=$v->{'r'}->find_last_ranid(); } $v->{'ranid'}=$ranid; my $motto=$v->motto(); ## fill ->{'files'} my $glob="$vemli_dir/*_$ranid"."_".$motto.".txt.gz"; my @vemli_files=glob("$glob"); undef($v->{'files'}); my $count_files=0; my $e=$v->{'e'} // $main::e // confess 'Where is my erimp?'; foreach my $vemli_file (@vemli_files) { my $date=$e->{'f'}->issuedate($vemli_file); if(not &Ernad::Dates::is($date)) { confess "You made up a date '$date'. I won't have it."; } ## defined in the base class if(not $v->is_issuedate_in_range($date)) { $v->echo(__LINE__,"The date '$date' in '$vemli_file' is out of range, I remove it."); unlink $vemli_file; next; } $count_files++; $v->{'files'}->{$vemli_file}=1; } $v->clear_files($ranid); ## no files found? if(not $count_files) { $v->{'files'}={}; } return $v->{'files'}; } sub clear_files { my $v=shift; my $ranid=shift // ''; my $vemli_dir=$v->set_dir('vemli'); if(not $ranid) { $v->echo(__LINE__,"I go to find the last ranid."); if(not $v->{'r'}) { confess "I need this defined."; } #$ranid=$v->{'r'}->find_last_ranid(); $ranid=$v->{'r'}->find_concorded(); } else { $v->echo(__LINE__,"I have been given a ranid '$ranid'."); } my $motto=$v->motto(); my $glob="$vemli_dir/*$motto.txt.gz"; my @vemli_files=glob("$glob"); my $ranid_check='_'.$ranid.'_'; ## fixme, this needs to be more elegant if($v->{'e'}->{'conf'}->{'allport_repcode'}) { $v->echo(__LINE__,"I have an allport, no vemli removal."); return 0; } $v->echo(__LINE__,"I start vemli removal."); ## just count how many we delete. my $count=0; foreach my $file (@vemli_files) { $count++; my $bana=basename($file); if(not $bana=~m|$ranid_check|) { $v->echo(__LINE__,"I remove $bana because it does not match $ranid_check."); unlink $file; } } return $count; } sub next_paper { my $v=shift; my $l={}; if(not defined($v->{'files'})) { confess "I need v->{'files'} defined here."; } if(not $v->{'fh'}) { my @files=sort keys %{$v->{'files'}}; my $file=shift @files; if(not defined($file)) { $v->echo(__LINE__,"I have no more files defined."); return undef; } $v->echo(__LINE__,"I open $file."); $v->{'file'}=$file; $v->{'fh'}= new IO::Uncompress::Gunzip $file // confess "I can't open $file."; undef $v->{'files'}->{$file}; my $issuedate=$v->deduct_issuedate_from_file($file); $v->{'issuedate'}=$issuedate; } my $fh=$v->{'fh'} // confess "I have no open file."; my $line=<$fh>; if(not defined($line)) { undef $v->{'fh'}; delete $v->{'files'}->{$v->{'file'}}; return ''; } $line=~m|([^#]+)\s+# (.*)| or confess "I can't parse $line."; ## data $l->{'d'}=$1; ## handle $l->{'u'}=$2; $l->{'h'}=uri_unescape($2); $l->{'i'}=$v->{'issuedate'}; #$v->echo(__LINE__,"I got ".$l->{'h'}." of ".$l->{'i'}); return $l; } ## <-- sub type { my $v=shift; my $file=shift; my $ranid=shift; my $print_mode=shift // 'show_full'; if(not -f $file) { confess "No such file $file"; } $v->setup(); ## loads last by default $v->{'ranfi'}=$v->{'r'}->load($ranid); if(not $v->{'ranfi'}) { confess "I have no fitrank of ranid '$ranid'.\n"; } open(F,"< $file"); my $line; ## oposite ranfi my $firan; foreach my $fit (keys %{$v->{'ranfi'}}) { $firan->{$v->{'ranfi'}->{$fit}} = $fit; ## save some memory delete $v->{'ranfi'}->{$fit}; } while($line=) { chomp $line; $line=~m|^(\S+)\s+([^#]+)\s+# (.*)| or confess "I can't parse $line."; my $indic=$1; my $data=$2; my $handle=uri_unescape($3); if(not ($indic eq '-1' or $indic eq '+1' or $indic eq '0')) { confess "I don't understand your indic '$indic'"; } print "$handle $indic\n "; my @pairs=reverse(split(' ',$data)); my $pos=0; foreach my $pair (@pairs) { my @parts=split(':',$pair); my $out; if($print_mode eq 'show_full') { $out=$firan->{$parts[0]}.' --> '.$parts[1]; } if($print_mode eq 'no_weights') { $out=$firan->{$parts[0]}; } my $len=length($out); if($pos + $len > 80) { print "\n "; $pos=0; } print "$out "; $pos=$pos + $len + 2; } print "\n"; } } 1;