package Ernad::Enesei; use strict; use warnings; use base 'Ernad'; use Carp qw(confess); use Data::Dumper; use File::Basename; use XML::LibXML; use Ernad::Constant; use Ernad::Store; #my $file = $ARGV[0] // ''; #if(not -f $file) { # confess "I need a file here."; #} ### for NEP, give #my $de_indent = $ARGV[1] // ''; #if(not $de_indent=~m|^\d+$|) { # confess "I need a de_indent to cut here."; #} #if($file=~m|nep|) { # $de_indent=6; #} #if($file=~m|bims|) { # $de_indent=6; #} my $xpc; sub file { my $in_file=shift; #my $date=shift // ''; #if($date and not &Ernad::Date::is($date)) { # confess "I don't like your date $date"; #} if(not -f $in_file) { confess "I don't see your file $in_file"; } my $de_indent=6; my $amf_ns = $Ernad::Constant::c->{'amf_ns'} // die; my $ernad_ns = $Ernad::Constant::c->{'ernad_ns'} // die; my $dom = XML::LibXML->load_xml(location => $in_file); my @col_eles=$dom->documentElement->getElementsByTagNameNS($amf_ns,'collection')->get_nodelist(); my $count=1; my $col_ele; my $out={}; my $e=$main::e // confess "I don't see an erimp"; if(not $xpc) { $xpc=$e->{'xpc'}; } while($col_ele=$col_eles[$count++]) { my $amf_ele=$dom->createElementNS($amf_ns,'amf'); my $repcode; my $prefix; if($xpc->find('//amf:isreplacedby',$col_ele)) { next; } my $repcode_ele=$col_ele->getElementsByTagNameNS($ernad_ns,'repcode')->[0]; if($repcode_ele) { $repcode=$repcode_ele->textContent; $prefix=$repcode_ele->prefix(); } else { my $hp_ele=$col_ele->getElementsByTagNameNS($amf_ns,'homepage')->[0]; if(not $hp_ele) { confess "i don't see the repcode in ".$col_ele; next; } my $hp=$hp_ele->textContent; $hp=~m|/([^/]+)$|; $repcode=$1; $prefix='ernad'; } if(not $repcode) { confess "I have no repcode."; } my $impna=$main::impna or die; if(not $repcode=~m|nep-\S+$|) { confess "bad repcode '$repcode'"; } $col_ele->setAttribute('xmlns',$amf_ns); $col_ele->setAttribute('xmlns:'.$prefix,$ernad_ns); $amf_ele->appendChild($col_ele); my $string=$amf_ele->toString; $string=&canonical_string($string); if($in_file=~m|nep|) { if($string=~m||) { next; } } elsif($in_file=~m|bims|) { $string=~s|\n <|\n <|g; $string=~s|\s*||; } else { #die "I have a problem with your file $in_file"; } my $dom = XML::LibXML->load_xml({string => $string}); if(not $dom) { confess "I can't parse $string"; } $out->{$repcode}=$dom; } return $out; } sub canonical_string { my $string=shift; my $de_indent=6; $string=~s|\n\s{$de_indent}|\n|g; $string=~s|krichel\@openlib\.org\n||; ## for NEP $string=~s|||; $string=~s|||; $string=~s|.*\s+||; $string=~s|\s+http://lists\.repec\.org/mailman/listinfo/nep-...||; $string=~s|\s+http://lists\.biomed\.news/listinfo/bims-[^<]+||; $string=~s|\s+||; $string=~s|\s*||; #$string=~s|\s*(xmlns:ernad="http://ernad\.openlib\.org")|\n $1|; $string=~s|\s+(id="[^"]+">)|\n $1|; $string=~s|<(/?)ernad:|<$1e:|g; $string=~s|xmlns:ernad|xmlns:e|; ## remove blank lines. $string=~s|\t| |g; $string=~s|\n[ \t]+\n|\n|g; return $string; } sub save { my $string=shift; my $out_file=shift; #my @lines=split("\n",$string); #print Dumper @lines; #my $out_file=$repcode.'_'.$date.'.amf.xml'; my $fh = IO::File->new(); $fh->open("> $out_file"); $fh->binmode('utf8'); print $fh "$string\n"; $fh->close; print "I write $out_file\n"; } 1;