
    x5g\                         d dl Z d dlZd dlZd dlZd dlZd dlZd dlmZ d dlm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ  G d
 d      Zy)    N)Plumi)Vemli)Recon)Runer)Sidat)Ishus)Spigr)Xpafsc                       e Zd Z	 	 ddZddZ	 	 ddZd Zd ZddZddZ	d	 Z
d
 Zd Zd Zd Zd Zd Zd Zd ZddZy)Trainc                    || _         t        | j                         | _        t        | j                         | _        t        | j                         | _        t        | j                         | _        t        | j                         | _
        t        | j                         | _        t        | j                         | _        t        | j                         | _        | j                  j"                  | _        d| _        d| _        d | _        d| j                   j*                  v r't-        | j                   j*                  d         | _        d | _        d | _        d | _        d | _        d | _        d | _        g | _        || _        d| _        y )NFd   train_limit_by_daysn) er   plumir   vemlir   reconr   runerr   sidatr
   xpafsr   ishusr	   spigrsig	has_errormodel_min_sizetrain_limitconfint
start_date
train_fufi
train_file
model_fufireportrepcodeseen_papids
do_verbose
is_derived)selferimpr'   do_musits       !/home/ernad/ernad/python/train.py__init__zTrain.__init__   s   466]
466]
466]
466]
466]
466]
466]
466]
::>>! DFFKK/"466;;/D#EFD $     c                    | j                   }||j                  vrt        d|z   t        j                         y|j                  |   }|| _        || _        | j                  }|j                  d   dz   |z   dz   | _        |j                  d   dz   |z   dz   | _	        |j                  d   dz   |z   dz   }t        j                  |       y )Nztrain does not see the repcode fileFmocla/.train.model)r   r$   printsysstderrr%   r   dirsr!   r#   filerprepare)r)   r%   grund_boostr   r$   r   r!   s          r,   setupzTrain.setup9   s    FF!(("3g=zz#'"
 hh ++g.4s:XE ++g.4s:XE [[)C/#5@
j!r.   c                    | j                   }| j                  |       | j                  }| j                   j                  |   j                         }t	        |      }|dkD  r|rt        d| d| d       y|j                  |   j                  d   }|j                  j                  |      }	|j                  d   }
|j                  j                  |
d	      }|t        j                  |d
d       |yt        j                  j                  |      | j                  k  r$t        d|        t        j                   |       y|	yt        j"                  ||	g      S )zdo we need to train?r   train: z has z issues waiting, no trainFsentr2   z*.model)searchr5   r'   Tz train: I delete the empty model )r   r=   r$   waitinglenr6   r9   d	very_lastlast_by_mtimer:   clear_by_extensionospathgetsizer   removedonere)r)   r%   skip_on_waitingskip_on_stalledr   r$   waiting_issuesrC   sent_dirlast_sent_file	mocla_dir
last_models               r,   rM   zTrain.donereO   s9    FF

7 w/779n%Q;?GG9E'2KLM 88G$))&1x0KK(	SS&&y&C
!$$ZeL77??:&)<)<<4ZLABIIj!!||J(899r.   c                    | j                  |      | _        | j                  }|rt        d       | j	                  |       | j
                  }| j                  }g | _        t        j                  |       t        j                  j                  |      rt        j                  |      rvt        d|z          t        j                  |       t        j                  j                  | j                        rt        d|z   dz   |z          t        j                  |       n}|rt        d|        | j                  |      r|r.t        d|        nt        d       t!        j"                          | j%                  |      s| j                  rt        d|        y	| j                  rt        d
|        t'        |d      | _        | j*                  j-                  || j                        }t/        |      dk(  rA| j0                  j3                  |       }|| j*                  j5                  |      }|d	u r#| j6                  j8                  d   }t        d| d| dt         j:                         t        j                  j                  | j                        rst        d|        | j(                  j=                          t        d| j
                          t        j                  | j
                         t!        j"                          | j6                  j8                  d   }t        d| dt         j:                         t!        j"                          |jt        dt         j:                         | j(                  j=                          t        j                  | j
                         t!        j"                          n>t        d|        ddlm }	 ddl!}
 |	| jD                        }|jG                  |       |j                  }t        | jD                  j6                  |   j8                  d          | jD                  j6                  |   j8                  d   }t        j                  jI                  |      }|dz   |z   }t        d| d|        |
jK                  ||       | j(                  j=                          t        j                  | j
                         t        d       t!        j"                          | j(                  jM                  |       | j(                  j=                          y d	}d	}|D ]z  }| jN                  QtQ        jR                  |      | jN                  kD  r/d }|st        d!|z          d }| j                  rt        d"|z          `||   | _*        | jW                  |       | d#| jD                  jX                  vr8| j                  r+t        d$       | j[                          | j]                          y |rF| j                  rt        d%| j^                   d&       | j[                          | j]                          y | j6                  ja                         s,t        d'       | j[                          | j]                          y | j(                  j=                          | j                  rt        d(       | j]                          t'        | j
                  d)      | _        | j0                  j3                  |       }| j(                  jM                  |       | j(                  j=                          t        d*       | j]                          y)+z=--> build the train file, only done if there is no train fileztrain: start buildtrain deletes the empty train deletes z
 based on ztrain checks train file ztrain: I checked ztrain ends without a train_fufiztrain: I don't renew Fztrain: start build w)r    r   Nforpitrain sees z but z
 is natal.r0   ztrain: I see ztrain: I remove ztrain: delete z and try againz/train: kaputt: no sigr, no dafus, no mothercodeztrain mother repcode is Moclar2   r3   ztrain copies z to zThe end.Tztrain skips plumis older than ztrain skips plumis from is_seedableztrain: no seedsr?   z is too old for seedsz train: report is not adolecents.ztrain: run lemma svgaztrain runs main svm)1start_trainr    r'   r6   r=   r!   r#   r&   r:   r;   rI   rJ   isfileis_emptyrL   check_train_fufir7   exitrM   openr"   r   dafusrD   r   
get_vemlisget_from_motherr$   r9   r8   closer2   r\   shutilr   last_sigbasenamecopy2writer   daterago
plumi_fufi
feed_issuer   finish_buildrun_svmr%   is_it_adolescent)r)   r%   r'   r!   r#   plumi_dafusvemlis
mothercode	forpi_dirr\   ri   r2   mother_model_fufi
model_fudi
model_banahave_i_printed_about_skipam_i_older_than_train_limit	issuedates                     r,   
build_filezTrain.build_filet   s   **73__
&'

7__
__
j!77>>*%~~j)0:=>		*%77>>$//2*Z7,F$% &IIj)4ZLAB((4! 1*>?;<HHJ{{7+ 5j\BC ??'
|45z3/jj&&w4??&K {q  ZZ**40F~!ZZ77@
&!%!1!1'!:IK	{%y
K"zz+ww~~doo6j\:;--/ 00ABC		$//2
!%!1!1'!:IN9+^D"zz+HHJ%K"zz+OO))+IIdoo.HHJ4ZLAB+!!$&&MENN:.(-(8(8%$&&--055g>?!%w!7!<!<W!EJ!#!1!12C!DJ!+c!1J!>JM*;)<DMNLL!2J?OO))+IIdoo.*%HHJOO!!&)OO!!#$)!&+#$I+yy#d&6&66.2+0:YFG04-??4y@A *)4DOOOI& %  +'(!!#(~-BCDLLN{{++-45LLN 	??()t4&&t,f%#$r.   c                     | j                   rt        d| j                  z          | j                  j	                          y)Nztrain finishes building T)r'   r6   r!   r"   rh   )r)   s    r,   rr   zTrain.finish_build  s.    ??,t>?r.   Nc                 2   || j                   v rn|)t        d|z   dz   t        j                         d| _        y|dk(  rt        d|z   dz          yt        d|z   dz   |z   t        j                         d| _        y| j                   j                  |       y)	NrZ   zagain.r0   TFgrund in the grund.z in )r&   r6   r7   r8   r   append)r)   papidnotes      r,   check_seen_papidszTrain.check_seen_papids	  s    D$$$|me+h6SZZH!%wme+.>>?me+f4t;::'!%&r.   c                    | j                  ||       | j                  }t        j                  j	                  |      rLt        j                  |      r7t        d|z   dz          t        j                  |       | j                  |       t        d       t        j                  | j                  dd       t        j                  j	                  |      sh| j                  rt        d|z          | j                  |       t        j                  j	                  |      st        d	|z   t        j                  
       | j                  rt        d|z          t        j                  | j                        rHt        d| j                  z   t        j                  
       t        j                  | j                         y| j                  }t        j                  ||g      st        d| d| d       y| j                   r/t        d       t        d|z          t        j                  |       y| j#                         }|yt        j                  |dd       y)z;--> run training, build train file only if no training filer<   rV   .ztrain: clear by extentionr4   TrB   ztrain builds ztrain could not build the r0   ztrain uses ztrain has no data in Ntrain doneres  over Fzmocla: end because of errorzmocla: I delete r5   )r=   r!   rI   rJ   r`   r:   ra   r6   rL   r   rH   r'   r7   r8   r#   rM   r   rs   )r)   r%   r<   r!   r#   outs         r,   runz	Train.run  s   

7
4__
77>>*%%..*D,z9C?@IIj!OOG$)*  (tLww~~j)o
23OOG$77>>*-2Z?::'??-*,->>$//*)DOO;zz#IIdoo&__
||J5N:,fZLBC>>/0$z12IIj!lln;  X$Gr.   c                    d}d| j                   j                  v r1|| j                   j                  d   dz   z  }|| j                  dz   z  }|| j                  z  }d| j                  z   dz   | j
                  z   }| j                  rt        |dz   |z          | j                  j                  ||d      }|t        d| j                  z   d	z          y | j                  rt        d
       t        j                  | j                  | j                        rKt        d| j                  z   dz   t        j                         t        j                   | j                         y y)N'/home/ernad/opt/usr/bin/svm-train -b 1 svm_train_flags train__Tdo_skipr?   
 is lockedsvm-train donemin_sizetrain finds that the model  is empty. It deletes it.r0   )r   r   r!   r#   r%   r   r'   r6   r   run_with_lockr:   ra   r   r7   r8   rI   rL   )r)   runnerloggerr   s       r,   rs   zTrain.run_svmB  s$   :+dffkk"34s::Fdoo++F$//!DLL(3.9??&3,'(jj&&vvt&D;)doo-<=??"#>>$//D4G4GH/$//A-.47JJ@IIdoo&r.   c                 2   | j                   '|| j                   k  r| j                  rt        d       y| j                  }| j                  rt        d|z          d| j                  z   dz   }|j
                  d   dz   |z   |z   }t        j                  j                  |      st        d|z   d	z   t        j                  
       t        d| j                  z   dz          t        j                  | j                         t        dt        j                  
       t        j                          t        j                  | j                   d      }d| _        d| j                   v rd| _        |j%                         }t'        j(                  |      | _        d| _        |D ]T  }| j/                  |dd j1                         |       | j3                  |j1                         j5                         |       V | j6                  j9                  | j,                         y)zfeeds issue into train fileNztrain skips {issuedate}ztrain: build_file adds r   z.json.gzr   r3   ztrain does not see z' this should have done by post_process.r0   rW   r   ztrain ends.rr   z/forpi/i    r   )r    r'   r6   r   r   r9   rI   rJ   r`   r7   r8   r!   rL   rc   gziprd   rp   r(   	readlinesr:   loadvstringr   decodefeed_plumi_linerstripr"   rm   )r)   r~   r   end
vemli_fufi
plumi_fileplumi_lineslines           r,   rq   zTrain.feed_issueX  s   ??&9t+F/0FF??+i78DHHnz)VVG_s*Y6<
ww~~j)'*4;<zz# "T__4s:;IIdoo&-cjj1HHJYYt4
'!DO **,J'D""4":#4#4#6Y"G  !5!5!7C   	dkk*r.   c                 x   ddl m}  || j                        | _         | j                   j                  }|Nt	        dt
        j                         t        j                  | j                         t        j                          | j                  j                  j                  |vr=t	        |       t        d| j                  j                  j                  z   dz   |z         t        j                   | j                   j                        }|D ]+  }||vst	        d|z   dz   d	z   t
        j                         - t#        | j                  d
      | _        |D ]_  }|| j&                  v rt	        d|z   dz          #| j)                  |d      s7||vr<| j$                  j+                  d||   z   dz          a y)z'--> add only the papids in grund_papidsr   )Grundz9train can't continue without grund vemlis, try 'grund -l'r0   zsig inconsistency between z and ztrain: the requested z is not in the grund,z try to run grund -lr^   ztrain finds seen paper r   r   r   z-1 
T)r   r   r   r   r6   r7   r8   rI   rL   r!   rc   r   r   r   	Exceptionr:   r   rd   r"   r&   r   rm   )r)   grund_papidsr   grund_vemli_fufigrund_vemlisr   s         r,   
feed_grundzTrain.feed_grundy  s   466]
::00#Mzz#IIdoo&HHJ::'77"#8"jj..2235<=./ 0 0 zz$**"7"78!EL(-58OO./58ZZA "
 t4!E((( /%7:JJK))%g)>L( OO!!%,u*="="DE "  r.   c                    | j                   }|dd }|dd  }||vr"t        |dz   |z   t        j                         y | xj                  |dz   ||   z   dz   z  c_        | xj                  |dz   | j
                  z   dz   z  c_        y )Nr      r   z is not in the vemli of r0   r   r   )r   r6   r7   r8   r   r(   )r)   r   r~   r   indicr   s         r,   r   zTrain.feed_plumi_line  s    FFQq	QR >%44y@zz# 	us{QuX-33y384??r.   c                    | j                   j                          t        d       | j                  | j                  d       ddlm}  || j                        | _        t        d       | j
                  j                  | j                  d d| j                  	      }t        d
|z          ddl	m
}  || j                        | _	        | j                  j                  |      }d}t        | j                  j                  d         |z
  }g }|D ]  }	||k  r|dz  }|j                  |	         t        |      |k7  r?t        dt!        t        |            z   dz   t!        |      z   t"        j$                         |S )Nztrain runs for grundTr   r   r[   )r*   ztrain runs grund classF)do_asembr'   rZ   )Asemb)fufi
grund_size   zcount problem r   r0   )r"   rh   r6   r   r%   r2   r\   r   r'   asembr   parse_side_fufir   r   r   rD   strr7   r8   )
r)   	grund_gapr\   	side_fufir   papwescountstartr   r   s
             r,   find_grund_to_addzTrain.find_grund_to_add  sD   $%40(
&'JJNN4<<.2oo # ?	mi'((
+++;DFFKK-.:Eu}
&	 
 |	)"S\):%;;cA	N#zz# r.   c                     | j                   j                  |      }|t        d| t        j                         | j
                  j                  |d      }|y |S )Nz!train can not read the recon for r0   z//e:start_train/text())r   docr6   r7   r8   r   none_or_one)r)   r%   r   founds       r,   r_   zTrain.start_train  sV    

w'=5gY?zz#

&&u.FG=r.   c                 ~   t        j                  |      }d}d}|D ]P  }|j                  d      r|dz  }|j                  d      r|dz  }1t        d| dt        j
                          y |dk(  rt        d	| d
t        j
                         | j                  rt	        j                          n t        d       t        j                  |       | j                  st        j                  |       y|dk(  rCt        d	| dt        j
                         t        d|        t        j                  |       yy)Nr   z-1r   z+1ztrain: bad train line ''r0   Fztrain: bad z without positive.ztrain removes {fufi}z without negative.ztrain removes T)
r:   
read_lines
startswithr6   r7   r8   r'   rc   rI   rL   )r)   r   lines
count_plus
count_minur   s         r,   rb   zTrain.check_train_fufi  s     &

Dt$a
t$a
+D63zz#  ?Kv%78zz#
,-		$??		$?Kv%78zz#
 N4&)*IIdO r.   c                 <   |j                  d      st        dt        j                         |dd }|dz   }t	        j
                  ||g      st        d| d| d	       y
t        j                  j                  |      }d}d| j                  j                  v r'|| j                  j                  d   dz   z  }||dz   z  }||z  }d|z   }| j                  rt        |dz   |z          | j                  j                  ||d      }|t        d|z   dz          y| j                  rt        d       t	        j                  || j                        rKt        d| j                   z   dz   t        j                         t        j"                  | j                          yy)z- used by mulsi only but could be more generalr4   z"train: bad train fufi {train_fufi}r0   Nir5   r   r   r   Fr   r   r   r   Tr   r?   r   r   r   r   r   )endswithr6   r7   r8   r:   rM   rI   rJ   rk   r   r   r'   r   r   ra   r   r#   rL   )r)   r!   train_body_finar#   banar   r   r   s           r,   run_train_by_fufizTrain.run_train_by_fufi   sy   ""8,6zz# %Sb/$x/
||J5N:,fZLBCww0:+dffkk"34s::Fj3&&F*D??&3,'(jj&&vvt&D;)j(<78??"#>>*t/B/BC/$//A-.47JJ@IIdoo&	 Dr.   c                     | j                   j                  |   }|j                  d   dz   }t        j                  |      }t	        |      dkD  r#t        d| t        j                         |d   S |d   S )Nr2   z/*.trainr   z$train: more than one train file for r0   r   r   )r   r$   r9   globrD   r6   r7   r8   )r)   r%   r$   
train_globfufiss        r,   	last_fufizTrain.last_fufi"  sl    w'[[)J6
		*%u:>8	Bzz#9Qxr.   c                    	 | j                   }d| j                   j                  vrt        dt        j                         yt        |j                  d         }| j                  j                  |      }t        |      }|d   }|d   }| j                  j                         }	|	|   }
|
|k  ry|	|   }||z   }||k  ry|j                  |   S )z7does training a report require issues older than traspitraspiztrain: no traspir0   Nr   r   )r   r   r6   r7   r8   r   r   re   listr   build_counter
issuedates)r)   r%   r'   r   r   plumi_fufisplumi_datesfirst_plumi
last_plumicounterfirst_plumi_counterlast_plumi_countlimit_earliest_plumi_used_counts                r,   overhangzTrain.overhang,  s    *FF 466;;&$zz#QVVH%&jj&&w/
 ;'!!n _
****,%k2&(":. +;V*C'*V3||;<<r.   )FF)F)FT)N)__name__
__module____qualname__r-   r=   rM   r   rr   r   r   rs   rq   r   r   r   r_   rb   r   r   r    r.   r,   r   r      sp    ).@", /4##:JM^$%HN,+B(T@ 8	$LD=r.   r   )r   r   rI   r7   r:   rn   r   r   r   r   r   r   r   r   r   r   r   r   r   r	   r   r
   r   r   r.   r,   <module>r      s=      	 

          s= s=r.   