package Ernad::Presort::Ranfi; use strict; use warnings; use base ('Ernad::Presorter'); use Carp qw(confess); use Data::Dumper; use File::Basename; use Krichel::Shonu; use Krichel::Shoti; use Krichel::File; ## main sub update_for_report { my $r=shift; my $e=$r->{'e'} // $main::e // confess "I have no erimp."; my $report=shift // confess "I need a report set here."; my $k=Ernad::Presort::Fidek->new({'e'=>$r->{'e'}}); $r->{'k'}=$k; $r->set_report($report); $k->set_report($report); my $i_stored=0; if(not $r->does_it_require_renewal()) { $e->echo(__LINE__,"The ranfi needs no update."); return 0; } my $can_rank_be_formed=$r->calculate_fitrank(); if(not $can_rank_be_formed) { $e->echo(__LINE__,"The rank can not be found."); return 0; #next; } $i_stored++; $e->echo(__LINE__,"I store the rank."); $r->store(); ## indicator of success return $i_stored; } ## main # sub update_for_report_with_fitcla { # my $r=shift; # my $report=shift // confess "I need a report set here."; # my $k=Ernad::Presort::Fidek->new({'e'=>$r->{'e'}}); # $r->{'k'}=$k; # $r->set_report($report); # $k->set_report($report); # my $fitclas=$r->get_fitclas(); # my $i_stored_for_a_fitcla=0; # foreach my $fitcla (keys %$fitclas) { # $k->set_fitcla($fitcla); # $r->{'k'}->set_fitcla($fitcla); # $r->setup_fidek(); # if(not $r->does_it_require_renewal()) { # next; # } # my $can_rank_be_formed=$r->calculate_fitrank(); # if(not $can_rank_be_formed) { # next; # } # $i_stored_for_a_fitcla++; # $r->store(); # } # ## indicator of success # return $i_stored_for_a_fitcla; # } ## returns 0 on failure sub calculate_fitrank { my $r=shift; ## FixMe: only works for term my $fidek_files=$r->{'k'}->list('train'); my $count_fits=0; foreach my $fidek_file (@$fidek_files) { my $fidek=$r->load_json_with_gz($fidek_file); $r->echo(__LINE__,"I load $fidek_file"); foreach my $handle (keys %$fidek) { foreach my $fit (keys %{$fidek->{$handle}}) { ## count features rather than files to make sure ## we don't take account of empty fideks $count_fits++; if(not defined($r->{'f'}->{$fit})) { $r->{'f'}->{$fit}=$fidek->{$handle}->{$fit}; } else { $r->{'f'}->{$fit}+=$fidek->{$handle}->{$fit}; } } } } ## no features, return 0 if(not $count_fits) { return 0; } ## trim $r->{'total'}=0; foreach my $fit (keys %{$r->{'f'}}) { if($r->{'f'}->{$fit}==1) { delete $r->{'f'}->{$fit}; next; } $r->{'total'}+=$r->{'f'}->{$fit}; } my $sorter = sub { if($r->{'f'}->{$a} < $r->{'f'}->{$b}) { return 1; } if($r->{'f'}->{$a} > $r->{'f'}->{$b}) { return -1; } return $a cmp $b; }; @{$r->{'r'}}=sort $sorter keys %{$r->{'f'}}; delete $r->{'f'}; my $count=1; my $rank={}; foreach my $fit (@{$r->{'r'}}) { $rank->{$fit}=$count++; } $r->{'rank'}=$rank; delete $r->{'r'}; $r->{'total'}=&Krichel::Shonu::make($r->{'total'}); $r->{'time'}=&Krichel::Shoti::now(); return 1; } sub store { my $r=shift; my $e=$r->{'e'} // $main::e // confess "I have no erimp."; my $learnport=$r->{'learnport'}; if(not $learnport) { $learnport=$r->get_learnport() // confess "I need this here."; } my $out_dir=$r->set_dir('ranfi'); if(not -d $out_dir) { mkpath $out_dir; } my $total=$r->{'total'} // confess "I must have a total here."; my $there_already=glob("$out_dir/*$total.json*"); if($there_already) { delete $r->{'rank'}; $e->echo(__LINE__,"It's already there $there_already."); return ''; } my $time=$r->{'time'}; my $out_file="$out_dir/$time"."_$total.json"; $e->echo(__LINE__,"I write $out_file"); $r->save_json_with_gz($r->{'rank'},$out_file); delete $r->{'rank'}; return $out_file; } sub setup_fidek { my $r=shift; if(not $r->{'k'}) { $r->{'k'}=Ernad::Presort::Fidek->new({'e'=>$r->{'e'}}); # $r->{'k'}->set_fitcla($r->{'fitcla'}); $r->{'k'}->set_report($r->{'report'}); $r->{'k'}->set_dir('fidek'); } my $fidek_dir=$r->{'k'}->set_dir('fidek'); my $list=$r->{'k'}->list('train'); $r->{'fidek_files'}=$list; return $list; } ## checks if there is a recent fidek ## that would require the ranfi to be updated sub does_it_require_renewal { my $r=shift; my $e=$r->{'e'} // $main::e // confess "I have no erimp."; my $fidek_files=$r->setup_fidek(); my @fidek_files=@$fidek_files; my $last=$r->find_last(); if(not $last) { $e->echo(__LINE__,"There is no last ranfi."); return 1; } if(&Ernad::Common::does_file_need_renewal($last,@fidek_files)) { $e->echo(__LINE__,"The last ranfi $last needs renewal."); return 1; } $e->echo(__LINE__,"I don't need to renew the ranfi."); return 0; } sub find_last { my $r=shift; my $ranfi_dir=$r->set_dir('ranfi'); my @ranfi_files=glob("$ranfi_dir/*json*.gz"); ## since the time comes first in the bana, the pop should give us the last element my $last=pop(@ranfi_files); ## the array has been shortened. foreach my $file (@ranfi_files) { my $bana=basename($file); $r->echo(__LINE__,"I remove the non-current file $bana."); unlink $file; } $r->{'last'}=$last; return $last; } sub find_last_by_ranid { my $r=shift; my $ranid=shift // confess "I need to know what ranid you want."; my $ranfi_dir=$r->set_dir('ranfi'); my @ranfi_files=glob("$ranfi_dir/*$ranid*json*.gz"); my $last=pop(@ranfi_files) // return ''; $r->{'last'}=$last; return $last; } sub find_last_ranid { my $r=shift; my $fufi; if(not $r->{'last'}) { $fufi=$r->find_last(); } else { $fufi=$r->{'last'}; } if(not $fufi) { return ''; } my $bana=basename($fufi); if(not $bana=~m|^[a-z0-9]{6}_([a-z0-9]+)\.json\.gz$|) { $r->echo(__LINE__,"This $fufi does not look like a ranfi to me."); } my $ranid=$1; return $ranid; } sub find_last_shoti { my $r=shift; my $fufi; if(not $r->{'last'}) { $fufi=$r->find_last(); } else { $fufi=$r->{'last'}; } if(not $fufi) { return ''; } my $bana=basename($fufi); if(not $bana=~m|^([a-z0-9]{6})_([a-z0-9]+)\.json\.gz$|) { confess "This $fufi does not look like a ranfi to me."; } my $shoti=$1; return $shoti; } #sub get_by_ranid { # my $r=shift; # my $ranid=shift // confess "I need to have a ranid here\n"; # my $ranfi_dir=$r->{'k'}->set_dir('ranfi'); # my @files=glob("$ranfi_dir/*_$ranid.json*"); # if(not @files) { # confess "I don't see a ranfi of ranid $ranid\n"; # } # ## just in case there are several ... I am being lax here. # my $last=pop(@files); # return $last; #} sub load { my $r=shift; my $ranid=shift // ''; my $ranfi_file; if($ranid) { $ranfi_file=$r->find_last_by_ranid($ranid); } else { $ranfi_file=$r->find_last(); } if(not $ranfi_file) { $r->echo(__LINE__,"I have no ranfi file."); return 0; } $r->{'rank'}=&Krichel::File::load($ranfi_file); return $r->{'rank'}; } sub list_by { my $r=shift; my $what=shift // confess "by what?"; my $ranfi_dir=$r->set_dir('ranfi'); my @ranfi_files=glob("$ranfi_dir/*.json.gz"); my $out; foreach my $ranfi_file (@ranfi_files) { my $bana=basename($ranfi_file); if(not $bana=~m|^([0-9a-z]{6})_([0-9a-z]+)\.json.gz|) { next; } if($what eq 'shoti') { $out->{$1}->{'file'}=$ranfi_file; $out->{$1}->{'ranid'}=$2; } elsif($what eq 'ranid') { $out->{$2}->{'file'}=$ranfi_file; $out->{$2}->{'shoti'}=$1; } else { confess "I don't know by what to list." } } return $out; } 1;