#!/usr/bin/perl

use strict;
use warnings;

use Carp qw(confess);
use File::Basename;
use File::Slurper;
use List::Util qw(shuffle);

use Data::Dumper;
use Sys::RunAlone;

use Ernad::Erimp;

my $impna = $ARGV[0] // die "no impna";


my $only_repcode = $ARGV[1] // '';

my $e=Ernad::Erimp->new({'impna' => $impna,
                         'verbose' => 1});

my $test=1;

## this needs to go somewhere else
my $learn_dir= $e->{'dir'}->{'learn'};
my $mocla_dir= $e->{'dir'}->{'learn'}.'/mocla';
my $dokli_dir= $e->{'dir'}->{'learn'}.'/dokli';
my $exfit_dir= $e->{'dir'}->{'learn'}.'/exfit';
my $fidek_dir= $e->{'dir'}->{'learn'}.'/fidek';
my $fitar_dir= $e->{'dir'}->{'learn'}.'/fitar';
my $lisig_dir= $e->{'dir'}->{'learn'}.'/lisig';
my $isink_dir= $e->{'dir'}->{'learn'}.'/isink';

## build minimum time should be a year ago
my $now=time;
my $min_time=$now-(365*24*60*60);

## should really be done via erimp
my @reports;
if($only_repcode) {
  $reports[0]=$only_repcode;
}
else {
  @reports=shuffle (`ls $mocla_dir`);
}

my $times_to_keep;
## times_to_keep are set in this loop
foreach my $report (@reports) {
  chomp $report;
  $times_to_keep=&work_on_mocla($report,$times_to_keep);  
}

if($test) {
  print Dumper $times_to_keep;
}


my $issuedates_to_keep=&work_on_timed_files($times_to_keep,$dokli_dir);
if($test) {
  print Dumper $issuedates_to_keep;
}

&work_on_dated_files($issuedates_to_keep,$exfit_dir);
&work_on_timed_files($times_to_keep,$fidek_dir);
&work_on_timed_files($times_to_keep,$fitar_dir);
&work_on_timed_files($times_to_keep,$lisig_dir);
foreach my $report (@reports) {
  &work_on_dated_files($issuedates_to_keep,$isink_dir.'/'.$report);
}

sub work_on_timed_files {
  my $times_to_keep=shift;
  my $dir=shift;
  ## check if we really have times
  if(scalar(keys %{$times_to_keep})<2) {
    die "I don't have enough times_to_keep";
  }
  my @files=`find $dir`;
  my $latest=0;
  foreach my $file (@files) {
    chomp $file;
    if(-d $file) {
      next;
    }
    if($file=~m|~$|) {
      die "bad file $file";
    }
    my $filename=basename($file);
    if(not ($filename=~m|_(\d+)\.[^.]+$|
             or $filename=~m|_(\d+)_|
             or $filename=~m|^(\d+)\.txt$|)) {
      warn "bad file $file";
      next;
    }
    my $time=$1;
    if($time < $min_time) {
      print "very small time $time\n";
    }
    if($times_to_keep->{$time}) {
      if($test) {
        print "I keep $file\n";
      }
    }
    else {
      if($test) {
        print "I want to delete $file at $time.\n";
      }
      else {
        unlink $file;
      }
    }
    if(substr($file,-5) eq 'index') {
      ## unlikely we need the -f check unless we have the April 2016 model
      ## deletion situation
      if(-f $file) {
        my @lines=&File::Slurper::read_lines($file);
        foreach my $line (@lines) {
          if($line=~m|^(\d{4}-\d{2}-\d{2})|) {
            $issuedates_to_keep->{$1}=1;
          }
          else {
            die "bad line '$line' in $file";
          }
        }
      }
    }
  }
  return $issuedates_to_keep;
}

sub work_on_dated_files {
  my $issuedates_to_keep=shift;
  my $dir=$exfit_dir;
  my @files=`find $dir`;
  foreach my $file (@files) {
    chomp $file;
    if(-d $file) {
      next;
    }
    if($file=~m|~$|) {
      die "bad file $file";
    }
    my $filename=basename($file);
    if(not $filename=~m|(\d{4}-\d{2}-\d{2})|) {
      warn "bad file $file";
      next;
    }
    my $date=$1;
    if($issuedates_to_keep->{$date}) {
      if($test) {
        print "I keep $file at $date\n";
      }
    }
    else {
      if($test) {
        print "I delete $file at $date\n";
      }
      else {
        unlink $file;
      }
    }
  }
}

## get the times on mocla
sub work_on_mocla {
  my $report=shift;
  my $times_to_keep=shift;
  my $dir=$mocla_dir.'/'.$report;
  my @files=`find $dir`;
  my $latest=0;
  foreach my $file (@files) {
    chomp $file;
    if(-d $file) {
      next;
    }
    ## something that is already gone
    if(not -f $file) {
      next;
    }
    if($file=~m|~$|) {
      warn "bad file $file";
    }
    if(not $file=~m|_(\d+)\.[^.]+$|) {
      warn "bad file $file";
      next;
    }
    my $time=$1;
    if($time < $min_time) {
      print "very small time $time\n";
    }
    if($time > $latest) {
      ## check that we have a model for this time
      ## this almost surely only needed when an idiot has
      ## deleted old models carelessly, like mid-April 2016
      ## note: we need the glob, and the scalar as separate lines here
      my $glob_time_model="$mocla_dir/$report/*_$time.model";
      my @found_time_files=glob($glob_time_model);
      my $count_time_models=scalar(@found_time_files);
      ## if there is no model with this time
      if($count_time_models<1) {
        if($test) {
          print "I counted $count_time_models model\n";
        }
        if($test) {
          print "I don't have a model for $glob_time_model\n";
        }
        my $glob_time="$mocla_dir/$report/*_$time.*";
        if($test) {
          print "I delete the glob $glob_time\n";
        }
        else {
          unlink glob($glob_time);
        }
      }
      else {
        $latest=$time;
      }
    }    
  }
  ## check that if we keep only files with the lastest time,
  ## we still have a model
  foreach my $file (@files) {
    chomp $file;
    if(-d $file) {
      next;
    }
    if($file=~m|$latest|) {
      if($test) {
        print "I keep $file.\n";
      }
    }
    else {
      if($test) {
        print "I want to delete $file.\n";
      }
      else {
        unlink $file;
      }
    }
  }
  $times_to_keep->{$latest}=1;
  return $times_to_keep;
}

__END__;
