package Ernad::Learn::Weights; use strict; use warnings; use Carp qw(cluck longmess shortmess croak confess); ## hash of weighing functions our $weigh; ## my $verbose=4; sub echo { my $line_number=shift; my $text=shift or confess "I need something to echo"; my $verbosity=shift // 0; if(not defined($verbose or ($verbosity < $verbose))) { print "$line_number | $text", "\n"; } } sub is_fit_invalid { my $fit=shift; my $fit_rank=shift or confess "I need a fit_rank here."; my $deleted_fits=shift; # or confess "I need deleted_fits here."; if(not defined($fit_rank->{$fit})) { if(defined($deleted_fits->{$fit})) { &echo(__LINE__,"I had earlier deleted feature '$fit'."); return 0; } &echo(__LINE__,"WARN: I can't find a rank for feature '$fit'"); return 1; } return 0; } $weigh->{'binary'} = sub { my $fit_count=shift; my $fit_rank=shift; my $deleted_fits=shift; my @fits=keys %{$fit_count}; my $liner; ## output string; my $out; foreach my $fit (@fits) { next if &is_fit_invalid($fit,$fit_rank,$deleted_fits); my $rank=$fit_rank->{$fit}; my $count=$fit_count->{$fit} // 0; if(not $count) { &echo(__LINE__,"I can't find a count for feature '$fit' in current_handle."); next; } my $weight=1; $liner->{$rank}=$weight; } foreach my $rank (sort {$a <=> $b} keys %{$liner}) { my $weight=$liner->{$rank}; $out.=" $rank:".$weight; } return $out; } ; $weigh->{'ift'} = sub { my $fit_count=shift; my $fit_rank=shift; my $deleted_fits=shift; my @fits=keys %{$fit_count}; my $liner; ## output string; my $out; foreach my $fit (@fits) { next if &is_fit_invalid($fit,$fit_rank,$deleted_fits); my $rank=$fit_rank->{$fit}; my $count=$fit_count->{$fit} // 0; if(not $count) { &echo(__LINE__,"I can't find a count for feature '$fit' in current_handle"); next; } $liner->{$rank}=1-(1/(1+$count)); } foreach my $rank (sort {$a <=> $b} keys %{$liner}) { my $weight=$liner->{$rank}; $weight=sprintf("%2.3f",$weight); ## economize the 0 before decimal point my $print_weight=substr($weight,1); $out.=" $rank:".$print_weight; } return $out; } ; $weigh->{'square_sum_one'} = sub { my $fit_count=shift; my $fit_rank=shift; my $deleted_fits=shift; my @fits=keys %{$fit_count}; my $total=0; my $liner; ## output string; my $out; foreach my $fit (@fits) { next if &is_fit_invalid($fit,$fit_rank,$deleted_fits); my $rank=$fit_rank->{$fit}; my $count=$fit_count->{$fit} // 0; if(not $count) { &echo(__LINE__,"I can't find a count for feature '$fit' in current_handle"); next; } my $weight=sqrt($count); $liner->{$rank}=$weight; $total+=$weight; } foreach my $rank (sort {$a <=> $b} keys %{$liner}) { my $weight=$liner->{$rank}/$total; $weight=sprintf("%6.4f",$weight); ## economize the 0 before decimal point my $print_weight=substr($weight,1); $out.=" $rank:".$print_weight; } return $out; } ; ## traditional weighing scheme $weigh->{'l2'} = sub { my $fit_count=shift; my $fit_rank=shift; my $deleted_fits=shift; my @fits=keys %{$fit_count}; my $total=0; my $liner; ## output string; my $out; foreach my $fit (@fits) { next if &is_fit_invalid($fit,$fit_rank,$deleted_fits); my $rank=$fit_rank->{$fit}; my $count=$fit_count->{$fit} // 0; if(not $count) { &echo(__LINE__,"I can't find a count for feature '$fit' in current_handle"); next; } my $weight=sqrt($count); $liner->{$rank}=sqrt($weight); $total+=$count; } $total=sqrt($total); foreach my $rank (sort {$a <=> $b} keys %{$liner}) { my $weight=$liner->{$rank}/$total; $weight=sprintf("%6.4f",$weight); ## economize the 0 before decimal point #my $print_weight=substr($weight,1); $out.=" $rank:".$weight; } return $out; } ;