package Ernad::Enesei;
use strict;
use warnings;
use base 'Ernad';
use Carp qw(confess);
use Data::Dumper;
use File::Basename;
use XML::LibXML;
use Ernad::Constant;
use Ernad::Store;
#my $file = $ARGV[0] // '';
#if(not -f $file) {
# confess "I need a file here.";
#}
### for NEP, give
#my $de_indent = $ARGV[1] // '';
#if(not $de_indent=~m|^\d+$|) {
# confess "I need a de_indent to cut here.";
#}
#if($file=~m|nep|) {
# $de_indent=6;
#}
#if($file=~m|bims|) {
# $de_indent=6;
#}
my $xpc;
sub file {
my $in_file=shift;
#my $date=shift // '';
#if($date and not &Ernad::Date::is($date)) {
# confess "I don't like your date $date";
#}
if(not -f $in_file) {
confess "I don't see your file $in_file";
}
my $de_indent=6;
my $amf_ns = $Ernad::Constant::c->{'amf_ns'} // die;
my $ernad_ns = $Ernad::Constant::c->{'ernad_ns'} // die;
my $dom = XML::LibXML->load_xml(location => $in_file);
my @col_eles=$dom->documentElement->getElementsByTagNameNS($amf_ns,'collection')->get_nodelist();
my $count=1;
my $col_ele;
my $out={};
my $e=$main::e // confess "I don't see an erimp";
if(not $xpc) {
$xpc=$e->{'xpc'};
}
while($col_ele=$col_eles[$count++]) {
my $amf_ele=$dom->createElementNS($amf_ns,'amf');
my $repcode;
my $prefix;
if($xpc->find('//amf:isreplacedby',$col_ele)) {
next;
}
my $repcode_ele=$col_ele->getElementsByTagNameNS($ernad_ns,'repcode')->[0];
if($repcode_ele) {
$repcode=$repcode_ele->textContent;
$prefix=$repcode_ele->prefix();
}
else {
my $hp_ele=$col_ele->getElementsByTagNameNS($amf_ns,'homepage')->[0];
if(not $hp_ele) {
confess "i don't see the repcode in ".$col_ele;
next;
}
my $hp=$hp_ele->textContent;
$hp=~m|/([^/]+)$|;
$repcode=$1;
$prefix='ernad';
}
if(not $repcode) {
confess "I have no repcode.";
}
my $impna=$main::impna or die;
if(not $repcode=~m|nep-\S+$|) {
confess "bad repcode '$repcode'";
}
$col_ele->setAttribute('xmlns',$amf_ns);
$col_ele->setAttribute('xmlns:'.$prefix,$ernad_ns);
$amf_ele->appendChild($col_ele);
my $string=$amf_ele->toString;
$string=&canonical_string($string);
if($in_file=~m|nep|) {
if($string=~m||) {
next;
}
}
elsif($in_file=~m|bims|) {
$string=~s|\n <|\n <|g;
$string=~s|\s*||;
}
else {
#die "I have a problem with your file $in_file";
}
my $dom = XML::LibXML->load_xml({string => $string});
if(not $dom) {
confess "I can't parse $string";
}
$out->{$repcode}=$dom;
}
return $out;
}
sub canonical_string {
my $string=shift;
my $de_indent=6;
$string=~s|\n\s{$de_indent}|\n|g;
$string=~s|krichel\@openlib\.org\n||;
## for NEP
$string=~s|||;
$string=~s|||;
$string=~s|.*\s+||;
$string=~s|\s+http://lists\.repec\.org/mailman/listinfo/nep-...||;
$string=~s|\s+http://lists\.biomed\.news/listinfo/bims-[^<]+||;
$string=~s|\s+||;
$string=~s|\s*||;
#$string=~s|\s*(xmlns:ernad="http://ernad\.openlib\.org")|\n $1|;
$string=~s|\s+(id="[^"]+">)|\n $1|;
$string=~s|<(/?)ernad:|<$1e:|g;
$string=~s|xmlns:ernad|xmlns:e|;
## remove blank lines.
$string=~s|\t| |g;
$string=~s|\n[ \t]+\n|\n|g;
return $string;
}
sub save {
my $string=shift;
my $out_file=shift;
#my @lines=split("\n",$string);
#print Dumper @lines;
#my $out_file=$repcode.'_'.$date.'.amf.xml';
my $fh = IO::File->new();
$fh->open("> $out_file");
$fh->binmode('utf8');
print $fh "$string\n";
$fh->close;
print "I write $out_file\n";
}
1;