o
    b@                     @   sr   d dl Z d dlZd dlZd dlZd dlZd dlZd dlmZ d dlm	Z	 d dl
mZ d dlmZ G dd dZdS )    N)Plumi)Vemli)Runer)Sidatc                   @   sv   e Zd ZdddZdddZdd Zdd	d
Zdd Zdd ZdddZ	dddZ
dd Zdd Zdd Zdd ZdS )TrainFc                 C   st   || _ t| j | _t| j | _t| j | _t| j | _| jj	| _	d| _
d | _d| j jv r5t| j jd | _|| _d S )NFtrain_limit_by_days)er   plumir   vemlir   runerr   sidatsig	has_errortrain_limitconfint
do_verbose)selferimpr    r   !/home/ernad/ernad/python/train.py__init__   s   
zTrain.__init__c                 C   s   | j }||jvrtd| tjd dS |j| }|| _|| _|s#i | _| j}|jd d | d | _	|jd d | d | _
|jd d | d | _|jd d | d }t| d S )	Nztrain does not see the repcode fileFmocla/.train.modelz.sutra.json.gz)r   reportprintsysstderrrepcodesutrar   dirs
train_fufi
model_fufi
sutra_fufifilerprepare)r   r"   grund_boostr   r   r   r%   r   r   r   setup    s"   

zTrain.setupc                 C   s   | j }| | | j}|j| jd }|j|}|du rdS |jd }|jj|dd}|du r2dS tj	|dk r<dS t
||gS )	z4no renew if *any* model is newer than last sent filesentNFr   z*.model)searchTd   )r   r+   r   r$   d	very_lastlast_by_mtimeospathgetsizer(   donere)r   r"   r   r   sent_dirlast_sent_file	mocla_dir
last_modelr   r   r   r5   5   s   

zTrain.donereNc                 C   s  | j }|r	td | | | j}| j}i | _g | _t| t	j
|rNt|rLtd|  t	| t	j
| jrKtd| d |  t	| ndS t|d| _| j|}d}d| _|D ]T}|| _i | j|< d| j| d< d| j| d	< | jd
urt|| jkrd| _d
| j| d< d
| j| d	< |std|  d}| j rtd|  qa|| | _| | qad| jjvs| jrtd| j d |   dS i | jd< d| jd d< d| jd d	< | j|}	| |	 | j |}
t	j
|
r5| j!"|
}|d
u rtd|
  n0t#|}|D ]!}| j$|dd | j%d||  d  | jd d  d7  < qntd|
  |d
urE| &| t'| jjd t(| j }|dkrltdt)| d  | *|}| &| n| jj+| j,d d }t--|D ]}t	| q||   dS )z=--> build the train file, only done if there is no train filezmocla: start buildtrain deletes the empty train deletes z
 based on Fwr   +1-1NTztrain skips plumis older than ztrain skips plumis from is_seedabletrain: z is too old for seeds
0000-00-00z%train does not have sidat vemlis, no seednote+1 
   z(train does not see external seed krikts 
grund_sizezI add z from grund.r   z/grund*).r   r   r+   r%   r&   r#   seen_papidsr(   r)   r2   r3   isfileis_emptyremoveopen
train_filer	   fufisam_i_older_than_train_limit	issuedater   daterago
plumi_fufi
feed_issuer   r   r"   finish_buildr   
sidis_fufi	add_sidis
krikt_fufir
   
build_fufiloadcheck_seen_papidswrite
feed_grundr   lenstrfind_grund_to_addr   r$   glob)r   r"   grund_papidsr   r%   r&   plumi_dafushave_i_printed_about_skiprQ   rW   krikt_seed_fufiseed_vemli_fufi
vemli_datapapid	grund_gap
grund_globfufir   r   r   
build_fileH   s   
















zTrain.build_filec                 C   sf   t | jdkrtdtjd dS | jrtd| j  t| j| j | jr,td| j	  | j
  dS )Nr   z#train does not write an empty sutrar   Fztrain writes ztrain finishes building T)r_   r#   r   r    r!   r   r'   r(   dumpr%   rN   close)r   r   r   r   rV      s   
zTrain.finish_buildc           	      C   s   t j|sddl}|jd| j dd t|}| j}|D ]_}| j	|| |}|| }|du rat
d | j| j | j	|| |}|du rad| d| }|d	| f7 }t
|tjd
 q| j||d | jd| d  | jd d  d7  < qdS )zThis adds the interla seedr   Nzsidat T)shellz+train got a None sidis vemli, try to updatez
train for z no z
for sidis r   rC   rE   rF   rA   r=   rG   )r2   r3   rJ   
subprocessrunr"   r(   r[   r
   grepr   r   update_reportr    r!   r\   rN   r]   r#   )	r   rW   rq   papisur"   ri   r
   rQ   errorr   r   r   rX      s*   
zTrain.add_sidisc                 C   s   || j v r9|d u rtd| d tjd d| _dS |dkr'td| d  dS td| d | tjd d| _dS | j | dS )	Ntrain sees zagain.r   TFgrund in the grund.z in )rI   r   r    r!   r   append)r   ri   rD   r   r   r   r\      s   
zTrain.check_seen_papidsc                 C   s&  | j ||d | j}tj|r't|r'td| d  t| | 	| t
| jd tj|sQ| jr=td|  | 	| tj|sQtd| tjd | jrZtd|  t| jrrtd	| j tjd t| j d
S | j}t||gstd| d| d dS | jrtd td|  t| t  d}d| jjv r|| jjd d 7 }||d 7 }||7 }d| d | j }| jrt|d |  | jj||dd}|d
u rtd| d  d
S | jrtd tj| jddr	td| j d tjd t| j d
S tj
|ddd d
S ) z;--> run training, build train file only if no training filer*   r:   .r   ztrain builds ztrain could not build the r   ztrain uses ztrain has no data in Nztrain doneres z over Fzmocla: end because of errorzmocla: I delete z/usr/bin/svm-train -b 1 svm_train_flags train__T)do_skipr@   z
 is lockedzsvm-train doneX   )min_sizeztrain finds that the model z is empty. It deletes it.r   )r   )r+   r%   r2   r3   rJ   r(   rK   r   rL   rm   clear_by_extensionr   r    r!   r&   r5   r   quitr   r   r   r   run_with_lock)r   r"   r*   r%   r&   runnerloggeroutr   r   r   rr      sn   





z	Train.runc                 C   s   | j }| jrtd|  d| j d }|jd d | | }tj|sHtd| d tj	d td	| j
 d
  t| j
 tdtj	d t  t| jd}| }t|| _d| _|D ]}| j|dd  |d | |   q^| j| j dS )zfeeds issue into train fileztrain: build_file adds r   z.json.gzr
   r   ztrain does not see z' this should have done by post_process.r   r;   r|   ztrain ends.r    rC   N)r   r   r   r   r$   r2   r3   rJ   r    r!   r%   rL   r   gziprM   rT   	readlinesr(   r[   vstringr\   decodefeed_plumi_linerstriprN   r]   )r   rQ   r   end
vemli_fufi
plumi_fileplumi_linesliner   r   r   rU   )  s.   zTrain.feed_issuec                 C   s*  ddl m} || j| _ | j j}|du r$tdtjd t| j	 t
  | jjj|vr8td| jjj d | t| j j}|D ]}||vrTtd| d	 d
 tjd qAt| j	d| _|D ]4}|| jv rntd| d  q^| j|ddsvq^||vr{q^| jd||  d  | jd d  d7  < q^dS )z'--> add only the papids in grund_papidsr   )GrundNz9train can't continue without grund vemlis, try 'grund -l'r   zsig inconsistency between z and ztrain: the requested z is not in the grund,z try to run grund -laztrain finds seen paper ry   rx   rC   z-1 rF   rA   r>   rG   T)rx   r   r   r   r   r    r!   r2   rL   r%   r   r   r
   r   	Exceptionr(   r[   rM   rN   rI   r\   r]   r#   )r   rc   r   grund_vemli_fufigrund_vemlisri   r   r   r   r^   A  sL   
zTrain.feed_grundc                 C   s|   | j }|dd }|dd  }||vr!t|d | j tjd d S | j| j |  d7  < |  j|d ||  d 7  _d S )	Nr      r   z is not in the vemli of r   rG   r~   rF   )r   r   rQ   r    r!   r#   r   )r   r   r   indicri   r   r   r   r   j  s   "zTrain.feed_plumi_linec           
      C   s  | j   td | j| jdd ddlm} || jd| _td | jj| jd d| jd	}td
|  ddl	m
} || jd| _	| j	j|d}d}t| jjd | }g }|D ]}	||k rc|d7 }qX||	 qXt||krtdtt| d t| tjd |S )Nztrain runs for grundTr{   r   )Mocla)r   ztrain runs grund classF)do_asembr   rw   )Asemb)rl   rH   rG   zcount problem r~   r   )rN   ro   r   rr   r"   r   r   r   r   asembr   parse_side_fufir   r   rz   r_   r`   r    r!   )
r   rj   r   	side_fufir   papwescountstartrc   ri   r   r   r   ra   x  s8   
zTrain.find_grund_to_add)F)N)__name__
__module____qualname__r   r+   r5   rm   rV   rX   r\   rr   rU   r^   r   ra   r   r   r   r   r      s    


s

6)r   )rb   r   r2   r    r(   rR   r	   r   r
   r   r   r   r   r   r   r   r   r   r   <module>   s   