package Ernad::Delivery; use base 'CGI::Application'; use strict; use warnings; use Carp qw(confess); use Data::Dumper; use CGI::Application::Plugin::Redirect; use URI::Escape; use File::Slurp; use XML::LibXML; use Ernad::Erimp; use Ernad::FullTextFetch; ## variable is used, leave it here my $testing=0; my $log_file="/tmp/delivery_test.log"; my $redirect_phrase='302 Found'; my $delay_phrase='429 Too Many Requests'; ## time limiting structure my $t; my $clear_time=3600; ## setup() can even be skipped for common cases. See CGI::Application doc sub setup { my $self = shift; $self->start_mode('mode1'); $self->run_modes('mode1' => 'show'); } ## shows response sub show { my $self = shift; ## Get CGI query object my $q = $self->query(); my $ip=$q->remote_addr(); my $enforced_delay=$main::e->{'conf'}->{'delivery_429_delay'} // 0; ## age of the sturcture we have if($enforced_delay) { my $now=time; if(not $t->{'age'}) { $t->{'age'}=$now; } elsif($t->{'age'}) { ## clear the old addresses from memory if($now - $t->{'age'} > $clear_time) { foreach my $ip (keys %{$t->{'ip'}}) { if($now - $t->{'ip'}->{$ip} > $enforced_delay) { delete $t->{'ip'}->{$ip}; } } } $t->{'age'}=$now; } #open(T,">> $log_file"); #print T "$ip\n"; #print T Dumper $t; #close T; if(not $t->{'ip'}->{$ip}) { $t->{'ip'}->{$ip}=$now; } elsif($now - $t->{'ip'}->{$ip} < $enforced_delay) { $self->header_props( '-status' => '429 Too Many Requests', '-type' => 'text/html; charset=utf-8', '-retry-after' => $enforced_delay ); $t->{'ip'}->{$ip}=$now; my $error_doc=$self->error_doc({'code'=>'error_429', 'client'=>$ip}); return $error_doc; } $t->{'ip'}->{$ip}=$now; } ## the f parameter for a location of a pafis file my $f_param = $q->param('f') // ''; if($f_param) { # if($testing) { # open(L,">> $log_file"); # print L "I do an f delivery for $f_param\n"; # close L; # } my $out=$self->deliver_pafi($f_param); if($out) { return $out; } } ## the s parameter for a single file in many file system my $s_param = $q->param('s') // ''; if($s_param) { ## if this is a legitimate url if($main::f->{'#'}->{$s_param}) { return $self->redirect($s_param, $redirect_phrase); } ## fixme: this should be parameterized. return $self->redirect('http://nep.repec.org/', $redirect_phrase); } # if($testing) { # open(L,">> $log_file"); # print L "s param is $s_param\n"; # close L; # } my $u_param = $q->param('u') // ''; if(not $u_param) { my $error_doc=$self->error_doc({'code' => 'no_parameter_u'}); return $error_doc; } #my $handle = uri_unescape($u_param) // ''; my $handle=$u_param; if(not $handle) { my $error_doc=$self->error_doc({'code'=>'no_u_decode', 'u_param'=>$u_param}); return $error_doc; } # if($testing) { # open(L,">> $log_file"); # print L "u param is $u_param\n"; # close L; # } ## try to find this file in the store my $try_pafis=&{$Ernad::FullTextFetch::fff->{'nep'}}($handle,$main::pafis_dir); my $print_ft=$try_pafis; my $i_have_pafis=0; # if($testing) { # open(L,">> $log_file"); # print L "$try_pafis\n"; # print L "try_pafis is $print_ft\n"; # close L; # } if(ref $try_pafis) { $i_have_pafis=1; # $print_ft=Dumper($try_pafis)."\n"; ## if there is only one full text if(scalar(@{$try_pafis} == 1)) { ## deliver it my $out=$self->deliver_pafi($try_pafis->[0]); if($out) { return $out; } } } my $futli=$main::f->{$handle} // ''; if(not $futli and not $i_have_pafis) { my $error_doc=$self->error_doc({'code'=>'no_url', 'handle'=>$handle}); return $error_doc; } ## a legacy difference between direct and indirect futlis my @direct_futlis = @{$futli->[0]}; my @indirect_futlis = @{$futli->[1]}; ## futlis or pafi my $futs={}; $futs->{'li'}->{'direct'}=$futli->[0]; $futs->{'li'}->{'indirect'}=$futli->[1]; if($i_have_pafis) { $futs->{'pafis'}=$try_pafis; } my $total_futlis= scalar(@direct_futlis) + scalar(@indirect_futlis); if( $total_futlis == 1 and not $i_have_pafis) { ## single link my $sl; ## the single futli if( scalar( @direct_futlis ) == 1 ) { $sl = $direct_futlis[0]; } else { $sl = $indirect_futlis[0]; } return $self->redirect($sl, $redirect_phrase); } ## multi futli case my $nucleus_doc = XML::LibXML::Document->new('1.0','utf-8'); my $ernad_ns = $main::e->{'const'}->{'ernad_ns'} or die; my $futli_ele = $nucleus_doc->createElementNS($ernad_ns,'futli'); $futli_ele->setAttribute('handle',$handle); foreach my $url ( @{$futs->{'li'}->{'direct'}}) { $futli_ele->addChild( &build_url($url, 'direct', $nucleus_doc ) ); } ## all indirect futlis foreach my $url ( @indirect_futlis ) { $futli_ele->addChild( &build_url($url, 'indirect', $nucleus_doc ) ); } ## all pdf foreach my $pafis_file ( @{$futs->{'pafis'}} ) { $futli_ele->addChild( &build_pafis($pafis_file, $nucleus_doc ) ); } $nucleus_doc->addChild( $futli_ele ); # if($testing) { # $nucleus_doc->toFile("/tmp/nep_delivery.xml"); # } my $html_doc=$main::e->{'t'}->transform($nucleus_doc,'delivery_html'); $self->header_add(-type => 'text/html', -charset => 'utf-8'); my $out=$html_doc->toString; return $out; } sub deliver_pafi { my $self=shift; my $loc=shift; my $pafis_dir=$main::pafis_dir; my $pafi_file="$main::pafis_dir/$loc"; if(not -f $pafi_file or -z $pafi_file) { return 0; } $self->header_add(-type => 'application/pdf'); my $contents=&File::Slurp::read_file($pafi_file); return $contents; } sub build_url { my $url = shift; my $direction = shift; my $doc = shift; ## creates url node ## first arg - target, second arg - direction my $ernad_ns=$main::e->{'const'}->{'ernad_ns'} or die; ## building elements my $url_ele = $doc->createElementNS( $ernad_ns, 'url'); $url_ele->setAttribute('target',$url); $url_ele->setAttribute('transporter','s'); $url_ele->setAttribute('direction',$direction); ## legit urls, avoid http://d.repec.org/n?s=http://cclone.tk/starwars.php ## as reported by CZ on 2016-01-01 $main::f->{'#'}->{$url}=1; ## also add the unescaped URL my $url_ue=uri_unescape($url); $main::f->{'#'}->{$url_ue}=1; return $url_ele; } sub build_pafis { my $pafis_file = shift; my $doc = shift; my $ernad_ns=$main::e->{'const'}->{'ernad_ns'} or die; #my $pafis_dir=$main::pavis_dir; ## building elements my $pafi_ele=$doc->createElementNS($ernad_ns, 'pafi' ); $pafi_ele->setAttribute('target',$pafis_file); $pafi_ele->setAttribute('transporter','f'); return $pafi_ele; } sub error_doc { my $self=shift; my $in=shift; my $nucleus_doc = XML::LibXML::Document->new('1.0', 'utf-8'); my $ernad_ns=$main::e->{'const'}->{'ernad_ns'} or die; my $error_node=$nucleus_doc->createElementNS($ernad_ns,'error'); foreach my $key (keys %$in) { my $value=$in->{$key}; $error_node->setAttribute($key,$value); } $nucleus_doc->setDocumentElement($error_node); # if($testing) { # $nucleus_doc->toFile("/tmp/e.xml"); # } my $html_doc=$main::e->{'t'}->transform($nucleus_doc,'delivery_error_page'); #my $doc=decode('utf8',$html_doc->toString); my $doc=$html_doc->toString; ## if the page sends a non-200 status, the code starts with errors. ## in that case we don't add a header. #if(substr($in->{'code'},0,6) eq 'error_') { # return $doc; #} $self->header_add(-type => 'application/xhtml+xml', -charset => 'utf-8'); return $doc; } 1;