package Ernad::Presort::Model; use strict; use warnings; use Carp qw(cluck longmess shortmess croak confess); use Data::Dumper; use File::Basename; use File::Slurper; use base ('Ernad::Presorter'); # use Ernad::Presort::Train; binmode(STDOUT,':utf8'); ## uses the $e->{'repcode'}; sub setup { my $m=shift; my $a=Ernad::Presort::Train->new({'e'=>$m->{'e'}}); $m->{'a'}=$a; my $svm_train_flags=$m->{'e'}->{'conf'}->{'svm_train_flags'} // ''; if($svm_train_flags) { $m->{'svm_train_flags'}=$svm_train_flags; $m->echo(__LINE__,"svm_train_flags set to $svm_train_flags",4); } else { confess "I need svm_train_flags in the ernad configuration."; } } ## callable external sub run { my $m=shift; $m->setup(); my $train_file=$m->{'a'}->build_file(); my $model_file=$train_file; $model_file=~s|\.train|.model|; if(not &Ernad::Common::does_file_need_renewal($model_file, $train_file)) { $m->echo(__LINE__,"I skip the renewal of $model_file."); return $model_file; } my $lock_file=$train_file; $lock_file=~s|\.train|.lock|; $m->echo(__LINE__,"lock_file is $lock_file.",7); if(-f $lock_file) { ## remove the lock after more than one day my $lock_days_old=-M $lock_file; if($lock_days_old > 1) { $m->echo(__LINE__,"I delete an $lock_days_old lock file $lock_file",10); unlink $lock_file; } else { $m->echo(__LINE__,"I find $lock_file, I don't build $model_file",10); return 0 ; } } else { $m->echo(__LINE__,"I don't see that the lock file, so I move on.",10); } my $out_file=$train_file; $out_file=~s|\.train|.out|; my $err_file=$train_file; $err_file=~s|\.train|.err|; my $time_file=$train_file; $time_file=~s|\.train|.time|; ## -b 1 is required to get probablities my $flags="-b 1" . ' '. $m->{'svm_train_flags'}; my $train_bin='/usr/bin/svm-train'; my $time_bin='/usr/bin/time'; my $redir="> $out_file 2> $err_file"; my $s="$time_bin -o $time_file -p $train_bin $flags $train_file $model_file $redir"; my $start=time; $m->echo(__LINE__,"command is $s",1); system("touch $lock_file"); system($s); my $end=time; unlink $lock_file; my $err_text=&File::Slurper::read_text($err_file); if(not $err_text=~m|^\s*$|) { confess "Training has finished with an error '$err_text'."; } my $out_text=&File::Slurper::read_text($out_file); if($out_text=~m|^\s*WARNING$|) { confess "Training has finished with an error '$out_text'."; } my $run_time=$end-$start; $m->echo(__LINE__,"I modelled in $run_time seconds.",1); ## clear the old files in the directory my $model_dir=dirname($model_file); my $model_bana=basename($model_file); $model_bana=~m|([^_]+)_([^_]+)\.| or confess "I can't parse the model file $model_bana."; my $ranid=$2; ## make sure we have a ranid and a model before deleting other files if($ranid and -f $model_file and not -z $model_file) { foreach my $file (glob("$model_dir/*")) { my $bana=basename($file); if(not $bana=~m|_$ranid\.|) { $m->echo(__LINE__,"I remove the non-current file $bana."); unlink $file; } } } return $model_file; } sub delete_incomplete { my $m=shift; my $learn_dir=$m->{'e'}->{'dir'}->{'learn'}; my $repcode=$m->get_repcode(); my $model_dir=$learn_dir.'/model/'.$repcode; my $model_glob="$model_dir/*.model"; my @model_files=glob($model_glob); foreach my $model_file (@model_files) { if(-z $model_file) { #confess "I want to unlink the model_file $model_file."; $m->echo(__LINE__,"I delete the empty model_file $model_file"); unlink $model_file; next; } ## check whether last line only contains numbery chars my $tail=`/usr/bin/tail -1 $model_file`; chomp $tail; ## remove first number from tail, it may contain -e $tail=~s|^\S+\s+||; if(not $tail=~m|^[-0-9:. ]+$|) { ## out of prudence. #confess "I want to unlink the model_file $model_file."; $m->echo(__LINE__,"I delete the useless model '$model_file'."); unlink $model_file; next; } } } sub most_recent { my $m=shift; my $learn_dir=$m->{'e'}->{'dir'}->{'learn'}; my $repcode=$m->get_repcode(); my $model_dir=$learn_dir.'/model/'.$repcode; my @model_files=glob("$model_dir/*.model"); my $last=pop(@model_files) or return 0; return $last; } ## called externally sub last_ranid { my $m=shift; $m->delete_incomplete(); my $last=$m->most_recent() or return 0; my $bana=basename($last); my $handle_string=substr($bana,0,length($bana)-length('.model')); my @handles_parts=split(/_/,$handle_string); my $ranid=$handles_parts[1]; return $ranid; } sub list_by { my $m=shift; my $what=shift // confess "by what?"; my $learn_dir=$m->{'e'}->{'dir'}->{'learn'}; my $repcode=$m->get_repcode(); my $model_dir=$learn_dir.'/model/'.$repcode; my @model_files=glob("$model_dir/*.model"); my $out; foreach my $model_file (@model_files) { my $bana=basename($model_file); if(not $bana=~m|^([0-9a-z]{6})_([0-9a-z]+)\.model|) { next; } if($what eq 'shoti') { $out->{$1}->{'file'}=$model_file; $out->{$1}->{'ranid'}=$2; } elsif($what eq 'ranid') { $out->{$2}->{'file'}=$model_file; $out->{$2}->{'shoti'}=$1; } else { confess "I don't know by what to list." } } return $out; } 1;