package Ernad::FullTextFetch; ## not strictly a part of Ernad use strict; use warnings; use Carp qw(confess); use File::Basename; use File::Find; my @bads=('bod','cit','full','hed','raw','txt','cite','abs','body','utf8~', 'pdf.body','pdf.abs','pdf.cite','pdf.txt.xml','pdf.txt.body','xml', 'pdf.txt.xml','pdf.txt.utf8','txt.xml','pdf.txt~','txt.utf8', 'txt-utf8','txt.body'); ## a hash of full-text fetch functions our $fff; $fff->{'nep'}= sub { my $handle=shift; my $dir=shift // confess "no directory set"; my $cut_length=length($dir)+1; if(not $handle=~m/repec\:([^\:]+)\:([^\:]+)\:(.*)/i) { return "bad handle $handle"; } my $a=$1; my $s=$2; my $n=$3; ## JMBC's magic line $n =~ s/[\/\&|\;\(\)]/\_/g; my $guessDir = $dir.'/'.$a.'/'.$s.'/'.$n; my $guess_dir_short_name=substr($guessDir,$cut_length); my @pdfs; my $check_stored_file = sub { my $file=shift; if(-d $file) { return 0; } foreach my $ext (@bads) { my $trunc=$file; if($trunc=~s/\.\Q$ext\E$//) { ## if file without ending exists # if(-f $trunc) { # return 0; # } return 0; } } my $bana=basename($file); if($bana=~m|^references|) { return 0; } return 1; } ; my $wanted_url = sub { my $name=$File::Find::name; if(&{$check_stored_file}($name)) { my $short_name=substr($name,$cut_length); push(@pdfs,$short_name); } } ; if( -d $guessDir ) { find($wanted_url, $guessDir); if(not scalar(@pdfs)) { return "no suitable files in $guessDir"; } return \@pdfs; } else { return "no directory $guessDir"; } } ; 1;