#!/usr/bin/perl

use strict;
use warnings;

use File::Basename;
use XML::LibXML;

use Krichel::Shoti;

my $file=$ARGV[0];

if(not defined($file)) {
  print "I have no file argument.\n";
  exit;
}
if(not -f $file) {
  print "I can't see your file '$file'.\n";
  exit;
}

my $time;
my $bana=basename($file);
my $dina=dirname($file);
if(not $bana=~m|_([0-9a-z]{6})\.|) {
  print "I found no shoti on $bana.";
  exit;
}
$time=$1;
my $now=&Krichel::Shoti::now();
my $out_bana=$bana;
$out_bana=~s|\Q$time\E|$now|;
my $out_file="$dina/$out_bana";

my $fh;
open $fh,"< $file";
binmode $fh; # drop all PerlIO layers possibly created by a use open pragma
my $doc = XML::LibXML->load_xml(IO => $fh);
close $fh;

my $root_ele=$doc->documentElement;
my $seen;

my $count_gone=0;
foreach my $text_ele ($root_ele->getElementsByTagName('text')) {
  my $id=$text_ele->getAttribute('ref');
  if(not $seen->{$id}) {
    $seen->{$id}=1;
    next;
  }
  my $haspart_ele=$text_ele->parentNode;
  my $haspart_name=$haspart_ele->nodeName;
  if(not $haspart_name eq 'haspart') {
    print "I skip the duplicate $id, as it's parent is not a haspart.";
    next;
  }
  my $coll_ele=$haspart_ele->parentNode;
  my $coll_name=$coll_ele->nodeName;
  if($coll_name ne 'collection') {
    print "I skip the duplicate '$id'. It's grandparent is a '$coll_name'.\n";
    next;
  }
  $coll_ele->removeChild($haspart_ele);
  $count_gone++;
}

if(not $count_gone) {
  print "I found no duplicates.\n";
  exit;
}

$doc->toFile($out_file);
print "I wrote $out_file with $count_gone fewer texts.\n";
