
    Zf@                         d dl Z d dlZd dlZd dlZd dlZd dlZd dlmZ d dlm	Z	 d dl
mZ d dlmZ  G d d          ZdS )    N)Plumi)Vemli)Runer)Sidatc                   `    e Zd ZddZddZd ZddZd Zd Zdd	Z	dd
Z
d Zd Zd Zd ZdS )TrainFc                    || _         t          | j                   | _        t          | j                   | _        t          | j                   | _        t          | j                   | _        | j        j	        | _	        d| _
        d | _        d| j         j        v r$t          | j         j        d                   | _        || _        d S )NFtrain_limit_by_days)er   plumir   vemlir   runerr   sidatsig	has_errortrain_limitconfint
do_verbose)selferimpr   s      !/home/ernad/ernad/python/train.py__init__zTrain.__init__   s    46]]
46]]
46]]
46]]
:> DFK//"46;/D#EFFD$t    c                    | j         }||j        vr t          d|z   t          j                   dS |j        |         }|| _        || _        |si | _        | j        }|j        d         dz   |z   dz   | _	        |j        d         dz   |z   dz   | _
        |j        d         dz   |z   dz   | _        |j        d         dz   |z   dz   }t          j        |           d S )	Nztrain does not see the repcode fileFmocla/.train.modelz.sutra.json.gz)r   reportprintsysstderrrepcodesutrar   dirs
train_fufi
model_fufi
sutra_fufifilerprepare)r   r&   grund_boostr   r"   r   r)   s          r   setupzTrain.setup    s    F!(""3g=z# # # #5'"  	DJh +g.4s:XE +g.4s:XE +g.4s:=MM[)C/#5@
j!!!!!r   c                    | j         }|                     |           | j        }|j        |         j        d         }|j                            |          }|dS |j        d         }|j                            |d          }|dS t          j        	                    |          dk     rdS t          j        ||g          S )	z4no renew if *any* model is newer than last sent filesentNFr   z*.model)searchTd   )r   r/   r"   r(   d	very_lastlast_by_mtimeospathgetsizer,   donere)r   r&   r   r"   sent_dirlast_sent_file	mocla_dir
last_models           r   r:   zTrain.donere5   s    F

78G$)&1x00!5K(	S&&y&CC
47??:&&,,4|J(8999r   Nc                 8	   | j         }|rt          d           |                     |           | j        }| j        }i | _        g | _        t          j        |           t          j
                            |          rt          j        |          rwt          d|z              t          j        |           t          j
                            | j                  r,t          d|z   dz   |z              t          j        |           ndS t          |d          | _        | j                            |          }d}d| _        |D ]}|| _        i | j        |<   d| j        |         d<   d| j        |         d	<   | j        tt+          j        |          | j        k    rWd| _        d
| j        |         d<   d
| j        |         d	<   |st          d|z              d}| j         rt          d|z              ||         | _        |                     |           d| j        j        vs| j        r.t          d| j         d           |                                  dS i | j        d<   d| j        d         d<   d| j        d         d	<   | j                            |          }	|                     |	           | j                             |          }
t          j
                            |
          r| j!        "                    |
          }|t          d|
z              nt          j#        |          }|D ]Z}| $                    |d           | j        %                    d||         z   dz              | j        d         dxx         dz  cc<   [nt          d|
z              || &                    |           tO          | j        j        d                   tQ          | j                  z
  }|dk    rMt          dtS          |          z   dz              | *                    |          }| &                    |           nK| j        j+        |         j,        d         dz   }t[          j-        |          D ]}t          j        |           |                                  dS )z=--> build the train file, only done if there is no train filezmocla: start buildtrain deletes the empty train deletes z
 based on Fwr   +1-1NTztrain skips plumis older than ztrain skips plumis from is_seedabletrain: z is too old for seeds
0000-00-00z%train does not have sidat vemlis, no seednote+1 
   z(train does not see external seed krikts 
grund_sizezI add z from grund.r   z/grund*).r   r#   r/   r)   r*   r'   seen_papidsr,   r-   r7   r8   isfileis_emptyremoveopen
train_filer   fufisam_i_older_than_train_limit	issuedater   daterago
plumi_fufi
feed_issuer   r   r&   finish_buildr   
sidis_fufi	add_sidis
krikt_fufir   
build_fufiloadcheck_seen_papidswrite
feed_grundr   lenstrfind_grund_to_addr"   r(   glob)r   r&   grund_papidsr   r)   r*   plumi_dafushave_i_printed_about_skiprW   r]   krikt_seed_fufiseed_vemli_fufi
vemli_datapapid	grund_gap
grund_globfufis                    r   
build_filezTrain.build_fileH   s   _
 	(&'''

7_
_

j!!!7>>*%% 	~j)) 0:=>>>	*%%%7>>$/22 **Z7,F$% & & &Ij))) uz3//j&&w//$)!+0($ 	' 	'I&DN$&DJy!*+DJy!$'*+DJy!$'+y##d&666370.2
9%d+.2
9%d+0 5:YFGGG04-? B4y@AAA))4DOOOI&&&&+++ ,?DL???@@@4#%
< )*
< &)*
< & Z**733
z"""
 *//887>>/** 	P"j33ODDO&=OPPPP #Z88
' 8 8E**5v*>>>O))%*U2C*Cd*JKKKJ|,T222a722228
 <NOOO
 #OOL)))
 L122S9I5J5JJ	q==(S^^+n<===11)<<LOOL)))) w/4W=	IJ	*--    	$tr   c                 X   t          | j                  dk    rt          dt          j                   dS | j        rt          d| j        z              t          j        | j        | j                   | j        rt          d| j	        z              | j
                                         dS )Nr   z#train does not write an empty sutrar   Fztrain writes ztrain finishes building T)re   r'   r#   r$   r%   r   r+   r,   dumpr)   rT   close)r   s    r   r\   zTrain.finish_build   s    tz??a7cjIIII5? 	5/DO3444
4:t/// ? 	@,t>???tr   c                    t           j                            |          s#ddl}|                    d| j        z   d           t          j        |          }| j        }|D ]}| j        	                    ||         |          }||         }|~t          d           | j                            | j                   | j        	                    ||         |          }|-d| d| }|d	| fz  }t          |t          j        
           |                     ||           | j                            d|z   dz              | j        d         dxx         dz  cc<   dS )zThis adds the interla seedr   Nzsidat T)shellz+train got a None sidis vemli, try to updatez
train for z no z
for sidis r   rI   rK   rL   rG   rC   rM   )r7   r8   rP   
subprocessrunr&   r,   ra   r   grepr#   r   update_reportr$   r%   rb   rT   rc   r'   )	r   r]   ry   papisur&   ro   r   rW   errors	            r   r^   zTrain.add_sidis   sy   w~~j)) 	@NN8dl2$N???J'', 	0 	0EJOOF5M599EuI}CDDD
((666
uu======e==E5)5566E%cj1111""5y"999O!!%%-$"6777J|$T***a/****tr   c                 8   || j         v rv|*t          d|z   dz   t          j                   d| _        dS |dk    rt          d|z   dz              dS t          d|z   dz   |z   t          j                   d| _        dS | j                             |           dS )	Ntrain sees zagain.r   TFgrund in the grund.z in )rO   r#   r$   r%   r   append)r   ro   rJ   s      r   rb   zTrain.check_seen_papids   s    D$$$|me+h6SZHHHH!%uwme+.>>???ume+f4t;:' ' ' '!%u&&&tr   c                    |                      ||           | j        }t          j                            |          rRt          j        |          r>t          d|z   dz              t          j        |           | 	                    |           t          j
        | j        d           t          j                            |          sk| j        rt          d|z              | 	                    |           t          j                            |          st          d|z   t          j                   | j        rt          d|z              t          j        | j                  r>t          d	| j        z   t          j                   t          j        | j                   d
S | j        }t          j        ||g          st          d| d| d           dS | j        rCt          d           t          d|z              t          j        |           t#                       d}d| j        j        v r || j        j        d         dz   z  }||dz   z  }||z  }d|z   dz   | j        z   }| j        rt          |dz   |z              | j                            ||d          }|t          d|z   dz              d
S | j        rt          d           t          j        | j        d          rAt          d| j        z   dz   t          j                   t          j        | j                   d
S t          j
        |dd           d
S ) z;--> run training, build train file only if no training filer.   r@   .r    ztrain builds ztrain could not build the r   ztrain uses ztrain has no data in Nztrain doneres z over Fzmocla: end because of errorzmocla: I delete z/usr/bin/svm-train -b 1 svm_train_flags train__T)do_skiprF   z
 is lockedzsvm-train doneX   )min_sizeztrain finds that the model z is empty. It deletes it.r!   )r   )r/   r)   r7   r8   rP   r,   rQ   r#   rR   rs   clear_by_extensionr   r$   r%   r*   r:   r   quitr   r   r   r   run_with_lock)r   r&   r.   r)   r*   runnerloggerouts           r   rz   z	Train.run   sa   

7
444_
7>>*%% 	%%.*D*D 	%,z9C?@@@Ij!!!OOG$$$ (;;;w~~j)) 	' 4o
2333OOG$$$7>>*-- '2Z?:' ' ' '? 	.-*,--->$/** 	)DO;z# # # #Ido&&&4_
|J55 	B:BBZBBBCCC5> 	/000$z1222Ij!!!FFF+++dfk"34s::Fj3&&F*G#c)DH4? 	)&3,'(((j&&vvt&DD;)j(<78884? 	$"###>$/B777 	/$/A-.47J@ @ @ @Ido&&&4 X$GGGGGGr   c                 |   | j         }| j        rt          d|z              d| j        z   dz   }|j        d         dz   |z   |z   }t
          j                            |          s}t          d|z   dz   t          j	                   t          d	| j
        z   d
z              t          j        | j
                   t          dt          j	                   t                       t          j        | j        d          }|                                }t#          j        |          | _        d| _        |D ]l}|                     |dd                                         |           |                     |                                                                           m| j                            | j                   dS )zfeeds issue into train fileztrain: build_file adds r   z.json.gzr   r   ztrain does not see z' this should have done by post_process.r   rA   r   ztrain ends.r    rI   N)r   r   r#   r   r(   r7   r8   rP   r$   r%   r)   rR   r   gziprS   rZ   	readlinesr,   ra   vstringrb   decodefeed_plumi_linerstriprT   rc   )r   rW   r   end
vemli_fufi
plumi_fileplumi_lineslines           r   r[   zTrain.feed_issue)  s   F? 	9+i7888DHnz)VG_s*Y6<
w~~j)) 	'*4;<z# # # # "T_4s:;;;Ido&&&-cj1111FFFYt44
 **,,J'' 	9 	9D""4":#4#4#6#6Y"GGG  !5!5!7!78888dk*****r   c                    ddl m}  || j                  | _         | j         j        }|Bt	          dt
          j                   t          j        | j	                   t                       | j        j        j        |vr't          d| j        j        j        z   dz   |z             t          j        | j         j                  }|D ]*}||vr$t	          d|z   d	z   d
z   t
          j                   +t#          | j	        d          | _        |D ]}|| j        v rt	          d|z   dz              !|                     |d          s9||vr>| j                            d||         z   dz              | j        d         dxx         dz  cc<   dS )z'--> add only the papids in grund_papidsr   )GrundNz9train can't continue without grund vemlis, try 'grund -l'r   zsig inconsistency between z and ztrain: the requested z is not in the grund,z try to run grund -laztrain finds seen paper r   r   rI   z-1 rL   rG   rD   rM   T)r   r   r   r   r#   r$   r%   r7   rR   r)   r   r   r   r   	Exceptionr,   ra   rS   rT   rO   rb   rc   r'   )r   ri   r   grund_vemli_fufigrund_vemlisro   s         r   rd   zTrain.feed_grundA  s   U46]]
:0#Mz# # # #Ido&&&FFF:'7778"j.235<=./ 0 0 0 z$*"788! 	A 	AEL((-58OO./58ZA A A A t44! 	0 	0E((( /%7:JJKKK))%g)>> L(( O!!%,u*="="DEEEJ|$T***a/****tr   c                    | j         }|dd         }|dd          }||vr(t          |dz   | j        z   t          j                   d S | j        | j                 |xx         dz  cc<   | xj        |dz   ||         z   dz   z  c_        d S )	Nr      r   z is not in the vemli of r   rM   r   rL   )r   r#   rW   r$   r%   r'   r   )r   r   r   indicro   s        r   r   zTrain.feed_plumi_linej  s    FQqS	QRR >>%44t~Ez# # # #F
4>"5)))Q.)))us{QuX-44r   c                    | j                                          t          d           |                     | j        d           ddlm}  || j                  | _        t          d           | j                            | j        d d| j        	          }t          d
|z              ddl	m
}  || j                  | _	        | j	                            |          }d}t          | j        j        d                   |z
  }g }|D ]#}	||k     r|dz  }|                    |	           $t          |          |k    rKt          dt!          t          |                    z   dz   t!          |          z   t"          j                   |S )Nztrain runs for grundTr   r   )Mocla)r   ztrain runs grund classF)do_asembr   r   )Asemb)rr   rN   rM   zcount problem r   r   )rT   rv   r#   rz   r&   r   r   r   r   asembr   parse_side_fufir   r   r   re   rf   r$   r%   )
r   rp   r   	side_fufir   papwescountstartri   ro   s
             r   rg   zTrain.find_grund_to_addx  s   $%%%4000U(((
&'''JNN4<.2o # ? ?	mi'(((U(((
+++;;DFK-..: 	' 	'Eu}}
&&&&|	))"S\):):%;%;;cA	NN#z# # # # r   )F)N)__name__
__module____qualname__r   r/   r:   rs   r\   r^   rb   rz   r[   rd   r   rg    r   r   r   r      s           " " " "*: : :&q q q qf    0   $3H 3H 3H 3Hl+ + +0' ' 'R5 5 5    r   r   )rh   r   r7   r$   r,   rX   r   r   r   r   r   r   r   r   r   r   r   r   <module>r      s      				 



                          B B B B B B B B B Br   