package Ernad::Presort::Train; use strict; use warnings; use Carp qw(cluck longmess shortmess croak confess); use Data::Dumper; use File::Basename; use File::Path; use base ('Ernad::Presorter'); use Ernad::Presort::Chopa; use Ernad::Presort::Ranfi; use Ernad::Presort::Vemli; binmode(STDOUT,':utf8'); ## returns the repcode sub setup { my $a=shift; my $h=Ernad::Presort::Chopa->new({'e'=>$a->{'e'}}); $a->{'h'}=$h; my $r=Ernad::Presort::Ranfi->new({'e'=>$a->{'e'}}); $a->{'r'}=$r; my $v=Ernad::Presort::Vemli->new({'e'=>$a->{'e'}}); $a->{'v'}=$v; $a->{'v'}->motto('train'); } ## callable external sub build_file { my $a=shift; ## check for setup if(not $a->{'h'}) { $a->setup(); } $a->setup_for_report(); my $ranid=$a->{'ranid'}; my $shoti=$a->{'shoti'}; my $train_file=$a->{'dir'}->{'model'}."/$shoti"."_$ranid.train"; $a->{'file'}->{'train'}=$train_file; ## FixMe: this needs more careful checking if the data is up-to-date if(-f $train_file and not -z $train_file) { $a->echo(__LINE__,"I use the existing $train_file"); return $train_file; } $a->{'fh'}->{'train'}=IO::File->new("> $train_file"); my $count_papers=0; ## find the files my $files=$a->{'v'}->find($ranid); my $l=$a->{'v'}->next_paper(); while(defined($l)) { ## end of a file reached if(not ref $l) { $l=$a->{'v'}->next_paper(); next; } if(not defined($l)) { last; } if(not $l) { next; } ## code to deal with paper $a->deal_with_paper($l); ## $l=$a->{'v'}->next_paper(); } $a->echo(__LINE__,"I wrote $train_file"); $a->{'fh'}->{'train'}->close(); return $train_file; } sub deal_with_paper { my $a=shift; my $l=shift // confess "I need a paper here."; if(not $l->{'i'}) { confess "My papers needs a date."; } ## this function will take care of error handling and return '' on error my $status=$a->{'h'}->status_by_handle_and_issuedate($l->{'i'},$l->{'h'}) or return; my $fh=$a->{'fh'}->{'train'}; print $fh "$status ". $l->{'d'}.' # '.$l->{'u'}."\n"; } sub setup_for_report { my $a=shift; my $repcode=$a->get_repcode() // confess "I can't get the repcode."; ## list the vemli files. my $e=$a->{'e'} // confess "I need an erimp here."; my $learn_dir=$e->{'dir'}->{'learn'}; ## set_dir can't be used here, as we use the report my $model_dir=$learn_dir.'/model/'.$repcode; if(not -d $model_dir) { $a->echo(__LINE__,"I make $model_dir."); mkpath($model_dir); } $a->{'dir'}->{'model'}=$model_dir; $a->{'ranid'}=$a->{'r'}->find_last_ranid() // confess "I need a ranid here."; $a->{'shoti'}=$a->{'r'}->find_last_shoti() // confess "I need a shoti here."; } sub train_flags { my $a=shift; my $train_flags=$a->{'e'}->{'conf'}->{'svm_train_flags'} // ''; if($train_flags) { $a->{'train_flags'}=$train_flags; $a->echo(__LINE__,"train_flags set to $train_flags",4); } else { confess "I need svm_train_flags in the ernad configuration."; } } 1;