package Ernad::Presort::Fidek::Frex; use strict; use warnings; use base ('Ernad::Presort::Fidek'); use Carp qw(confess); use Data::Dumper; use Ernad::Presort::Frexe; # use Ernad::Store; binmode(STDOUT,':utf8'); ## main active call sub add_fits { my $k=shift; ## calling object. FixMe: I should have another name here. my $d=shift; my $line=shift // confess "I need a line here."; my $papid=shift // confess "I need a papid here."; # $k->{'fitcla'}='frex'; chomp $line; ## frases only use lowercase $line=lc($line); my $e=$k->{'e'} // $main::e // confess "I can't see your erimp"; ## skip if this is not configured if(not $e->{'conf'}->{'frast'}) { return 0; } $e->echo(__LINE__,"LINE '$line'",10);; ## load the frexe here if it it's not there. if(not $k->{'x'}) { my $a=Ernad::Presort::Frexe->new({'e'=>$e}); $a->get(); $k->{'x'}=$a->get() // confess "I can't get the frexe."; } ## the frexe my $x=$k->{'x'}; ## start position my @terms=split(/\s+/,&treat_line($line)); my $total_terms=$#terms; my $count_pos=0; ## frexe inspector ## we don't need to reach the last term my $frase; while($count_pos<$total_terms) { my $term=$terms[$count_pos]; $e->echo(__LINE__,"\nstart term is '$term'",10); my $frase=$term; my $count_for=0; my $ix=$x; $e->echo(__LINE__,"I enter while",10); while($ix=$ix->{$term}) { my $next_term=$terms[$count_pos+$count_for+1]; if(not defined($next_term)) { $e->echo(__LINE__,"I reached the end of the line",10); last; } $frase.=' '.$next_term; $e->echo(__LINE__,"potential phrase '$frase'",10); if($ix->{$next_term}) { ## FixMe: this if introduced 2019-01-01 should be gone if(ref($ix->{$next_term})) { if($ix->{$next_term}->{'_'} or ! keys %{$ix->{$next_term}}) { $e->echo(__LINE__,"'$frase' in $papid",10); $d->add_fit($frase,$papid,$count_for+1); } else { $e->echo(__LINE__,"frase '$frase' not terminal.",10); } } else { warn "bad structure at $next_term" . Dumper $ix->{$next_term}; delete $ix->{$next_term}; } } else { $e->echo(__LINE__,"'$frase' is not registered",10); } if($count_for > 1) { $e->echo(__LINE__,"|$term|",10); my $dix=Dumper $ix; $e->echo(__LINE__,$dix,10); } $term=$next_term; $count_for++; } my $dix=Dumper $ix; $e->echo(__LINE__,"I have nothing for $term in $dix",10); $count_pos++; } } sub treat_line { my $line=shift; $line=lc($line); $line=~s|\s+| |g; ## \p{punct} would include the ')'; $line=~s/[;.,]+(\s|$)/ /g; return $line; } 1; __END__;