#!/usr/bin/perl

use strict;
use warnings;
use utf8;

use Data::Dumper;
use File::Basename;
use File::Copy;
use File::Path;
use Getopt::Std;
use POSIX qw( strftime );
use Storable;
use XML::LibXML;

# use Ernad::Chungju;
use Ernad::Common;
use Ernad::Dates;
#use Ernad::Checking;
use Ernad::Erimp;
use Ernad::Generate;
use Ernad::Pile;
use Ernad::Layer;
use Krichel::File;
#use Ernad::Presorting;

## options
my %o;
getopts("ovxFd:s", \%o) or die;

if($o{'h'}) {
  usage();
  exit;
}


my     @elements;               #List of DB Elements



binmode(STDOUT,":utf8:");

my $today=&Ernad::Dates::today();
if($o{'d'}) {
  my $given_date=$o{'d'};
  if(not $given_date=~m|^\d{4}-\d{2}-\d{2}$|) {
    print "I can't work with your -d $given_date\n";
    exit;
  }
  $today=$given_date;
}

## flags
#my $Help   =       0;
#my $Verbose=       1;
my $issue_date = $today;
#my $issue_set =       0;
## a variable that may be set in the options
my $directory_to_watch='';

## give the impna
my $impna=$ARGV[0] // '';
if(not $impna) {
  print "I don't have an impna parameter\n";
  &usage();
  exit;
}

our $e=Ernad::Erimp->new({'impna'=>$impna});
my $amf_ns=$e->{'const'}->{'amf_ns'};
## create an xpath contents
my $xpc = $e->{'x'}->{'xpc'};
my $file_list_tmp_file = $e->{'dir'}->{'tmp'}.'/'.$e->{'const'}->{'amf_file_list'};

my $pile_doc  = XML::LibXML::Document->new('1.0','utf-8');
my $pile_file = $e->{'dir'}->{'pile'}.'/new/'."$today.xml";
Krichel::File::prepare($pile_file);

if(not $o{'o'} and -f $pile_file) {
  print "I found $pile_file, use -o to overwrite.\n";
  exit;
}

my $db_dir = $e->{'dir'}->{'db'};
my $db_backup_dir = $e->{'dir'}->{'db_backup'};
my $date_base_file = $db_dir.'/'.$e->{'const'}->{'date_base_file'};
my $type_restrict = $e->{'conf'}->{'type_restrict'} // '';
my $date_required = $e->{'conf'}->{'date_required'} // '';
my $pile_root_element = $e->{'x'}->amf_element();
my $allport_repcode = $e->get_allport_repcode();
my $allport_collection = $e->get_allport_collection();
my $piles_dir = $e->{'dir'}->{'pile'} // die "I need a piles dir defined.";

my $count_papers_included=0;

$allport_collection=Ernad::Generate::inject_issue_date($allport_collection,$issue_date);
$pile_root_element->appendChild($allport_collection);

my $futli_base_file = $db_dir.'/'.$e->{'const'}->{'futli_base_file'};
my $do_futli_base=$e->{'conf'}->{'do_futli_base'} // '';

# option F -- don't read the full-text checking files

my $futli_checking_base;
if(not $o{'F'}) {
  $futli_checking_base=&Ernad::Pile::get_futli_checking_base($o{'o'});
}
else {
  my $futli_checking_base_file = $db_dir.'/'.$e->{'const'}->{'futli_checking_base_file'};
  # $futli_checking_base=&Ernad::Common::load_from_file($futli_checking_base_file);
  $futli_checking_base=&Krichel::File::load($futli_checking_base_file);
}

## backup
my $current_backup_dir = $db_backup_dir.'/'.$issue_date;
if(not $issue_date=~m|^\d{4}-\d{2}-\d{2}$|) {
  die "I don't like the issue_date $issue_date";
}

if(not -d $current_backup_dir) {
  mkpath $current_backup_dir;
  ## do it twice for additional safety
  system("rsync -va $db_dir/ $current_backup_dir");
  system("rsync -va $db_dir/ $current_backup_dir");
}
else {
  print "The backup_dir $current_backup_dir already exists.\n";
  print "I am not creating another backup for $issue_date.\n";
}

## loading db
print "Retrieving existing date base: $date_base_file\n";
my $date_base;
if ( not  -f $date_base_file ) {
  die "I can't find the date_base $date_base_file";
}
#$date_base =  Ernad::Common::load_from_file( $date_base_file );
$date_base =  &Krichel::File::load( $date_base_file );

## loading futli_base
my $futli_base;
if($do_futli_base) {
  print "Retrieving existing futli base.\n";
  if ( -f $futli_base_file ) {
    # $futli_base = Ernad::Common::load_from_file( $futli_base_file );
    $futli_base = &Krichel::File::load( $futli_base_file );
  }
  else {
    if($o{'v'}) {
      print "I found no futli base.";
      exit;
    }
  }
}


if(not -d dirname($file_list_tmp_file)) {
  mkdir dirname($file_list_tmp_file);
}

## if it is not there or over a day old
## we can create the file list every time now, since we have the fields
##if(not -f $file_list_tmp_file or -M $file_list_tmp_file > .5) {
my $papers_dir='';
## given on command line
if($directory_to_watch) {
  $papers_dir=$directory_to_watch;
}
else {
  $papers_dir=$e->{'conf'}->{'directory_to_watch'};
}
my $find_file_command = "find ".$papers_dir." -name '*.amf.xml' -type f > $file_list_tmp_file";
print "Executing '$find_file_command' to get file list ... ";
system("$find_file_command");
print " gathered AMF file list.\n";
## commenting out the end of the previous if
#}
#else {
#  print "using existing $file_list_tmp_file\n";
#}

print "Progress:\n";
## bulids amf_element

print "Stage 1:.\n" if( $o{'v'} );

my $file_name_restrict = $e->{'conf'}->{'PapersFileRestrict'} // '';
my $paper_id_restrict = $e->{'conf'}->{'PapersIdRestrict'} // '';

if(not -f $file_list_tmp_file) {
  die "I don't see the file $file_list_tmp_file";
}

open(AMF_FILE_LIST, "< $file_list_tmp_file");

my $amf_file;
my $count_parsed_files=0;
while ($amf_file = <AMF_FILE_LIST>) {
  chomp $amf_file;
  ## paper hande
  my $id = "";

  ## is the file name part of the restricted set
  if ( $file_name_restrict ) {
    next if( $amf_file =~ m/$file_name_restrict/ );
  }

  my $age=-M $amf_file;
  if($o{'v'}) {
    ## this is if you want to have it very verbose
    print "parsing file $amf_file, of age $age\n";
  }
  if(not -f $amf_file) {
    die "I don't see '$amf_file'. It is listed in '$file_list_tmp_file'";
  }
  open my $amf_fh, '<', $amf_file;
  binmode $amf_fh; # drop all PerlIO layers possibly created by a use open pragma
  my $doc;
  ## protect this against a crash
  eval {
    $doc = XML::LibXML->load_xml(IO => $amf_fh);
  };
  if (ref $doc ne 'XML::LibXML::Document') {
    print "I could not parse file $amf_file ... probably the list field file.\n";
    next;
  }
  if($o{'v'}) {
    print "I parsed $amf_file\n";
  }
  ## we look at amf:text that is as direct child of the root
  my $amf_element=$doc->documentElement;
  my @text_elements=$amf_element->getElementsByTagNameNS($amf_ns,'text')->get_nodelist;
  $count_parsed_files++;
  foreach my $text_element (@text_elements) {
    my $id=$text_element->getAttribute('id');
    if(not defined($id)) {
      if($o{'v'}) {
        print "skipping a text without id= in $amf_file\n";
      }
      next;
    }
    if($o{'v'}) {
      print "found a text element '$id'\n";
    }
    ## check if a handle restriction is passed.
    if ($paper_id_restrict) {
      if ( $id =~ m/$paper_id_restrict/ ) {
        if($o{'v'}) {
          print "I skip '$id', because it matches '$paper_id_restrict'\n";
        }
        next;
      }
    }
    ## update the futli base
    if($do_futli_base) {
      if(not $futli_base->{$id} ) {
        UpdateFutli( $text_element, $id );
      }
    }
    ##
    if($date_base->{$id}) {
      if ($o{'v'}) {
        print "I skip '$id' because it is in the date_base.\n";
      }
      next;
    }
    if($o{'v'}) {
      print "Paper '$id' is not in the date_base, further checks ...\n";
    }
    ## this does it's own reporting
    if(not &Ernad::Pile::pre_date_base( $text_element, $type_restrict, $o{'v'}, $id) ) {
      next;
    }
    ## if there is not futli checking base directory, it will be undefined
    if($futli_checking_base) {
      if (not exists( $futli_checking_base->{lc($id)} ) ) {
        if($o{'v'}) {
          print "I skip '".lc($id)."', because it didn't pass futli check.\n";
        }
        next;
      }
    }
    if($o{'v'}) {
      print lc($id) ." passes futli check.\n";
    }
    ## this does it's own reporting
    if(not &Ernad::Pile::post_date_base( $text_element, $id, $o{'v'} ) ) {
      next;
    }
    if ($o{'v'}) {
      print "I put $id into the pile.\n";
    }
    $count_papers_included++;
    ## put it into the date
    $date_base->{$id}  = $today;
    ### still skip if it is in the ever_handle
    my $lc_id=lc($id);
    ## append to the global new papers element
    $pile_root_element->appendChild($text_element);
    ## Futli
    if($futli_base) {
      if($futli_base->{$id}) {
        print "warning: new paper $id has an entry in the futli_base!\n";
        # next;
      }
    }
    if($do_futli_base) {
      if($o{'v'}) {
        print "note: I update the futli\n";
      }
      UpdateFutli( $text_element, $id );
    }
  }
}


if(not $o{'x'}) {
  if( $o{'v'} ) {
    print( "Saving date base.\n" )
  }
  &Krichel::File::save($date_base_file,$date_base)
}
if($do_futli_base) {
  if($o{'v'}) {
    print "Saving futli base.\n";
  }
  if(not $o{'x'}) {
    &Krichel::File::save($futli_base_file,$futli_base)
  }
}


$pile_doc->setDocumentElement($pile_root_element);
my $xslt_file=$e->{'dir'}->{'style'}.'/'.$e->{'const'}->{'make_allport'};
# my $amf_doc=Ernad::Common::xslt_transform($pile_doc,$xslt_file);
my $amf_doc=$e->{'t'}->transform($pile_doc,$e->{'const'}->{'make_allport'});

## layer it
my $doc=$amf_doc->cloneNode(1);
## don't do this if the pile will contain no papers because sorting will die
if($count_papers_included) {
  $doc=&Ernad::Layer::layer($doc,$impna);
}
$doc->toFile($pile_file,1);

if($o{'v'}) {
  print "I added $count_papers_included papers\n";
  print "Done\n";
}


exit;



###################################################################################

sub     UpdateFutli {
  my  ( $text_element, $id ) = @_; #parameter 'text' node and its id
  my  @dr;                         #array of direct futlis`
  my  @idr;                        #array of indirect futlis

  my  $l;                       #for iteration;


  @dr = $xpc->findnodes("./amf:file/amf:url", $text_element);
  ## I don't tihnk ths is is used, but ok
  @idr = $xpc->findnodes( "./amf:hasversion/amf:text/amf:file/amf:url", $text_element );

  my  ( @dl, @il, @ar );        #for storing
  #building info
  #firstly handling direct links
  foreach $l ( @dr ) {
    my $url=$l->textContent;
    push @dl, $url;
    if($o{'v'}) {
      print "Direct futli ".$url." added\n";
    }
  }

  ## then indirect futlis
  foreach $l ( @idr ) {
    my $url=$l->textContent;
    push @il, $url;
    if($o{'v'}) {
      print "Indirect futli ".$url." added\n";
    }
  }

  push @ar, \@dl;
  push @ar, \@il;
  ## applying to futli base
  $futli_base->{$id} = \@ar;

  undef @dr;
  undef @idr;
}





sub usage {
  my $u;
  $u.="Usage: make_pile [args] impna\n";
  $u.="-h print help\n";
  $u.="-v verbose\n";
  $u.="-d for a date\n";
  $u.="-o overwrite existing output file\n";
  $u.="-x don’t save date_base and futli_base\n";
  $u.="-F don’t recompile the futli_checking_base, for fast testing only\n";
  print $u;
}




exit;
