a
    Qb)                     @   sT  d dl Z d dlmZ d dlZd dlmZ d dlZd dlZd dlZd dlZd dl	Z	d dl
Z
d dlZd dlZd dlmZ edZedZdd Zdd	 Zd
d Zdd Zdd Zdd Zd6ddZdd Zdd Zdd Zdd Zdd Zd7dd Zd!d" Zd#d$ Z d8d%d&Z!d9d(d)Z"d*d+ Z#d:d,d-Z$d;d.d/Z%d<d0d1Z&d=d2d3Z'd4d5 Z(dS )>    N)datetime)copyfilez7It\s+was\s+last\s+changed\s+on\s+\d{4}.\d{2}.\d{2}\.\s*z\s+c                  C   s.   t  } t d}dt|  d | }|S )N%sz/tmp/_)osgetpidr   nowstrftimestr)pidtistZ	warc_path r   !/home/ernad/ernad/python/filer.pytemp_warc_path   s    r   c              
   C   sb   t jt j| s^zt t j|  W n2 ty\ } z|jtjkrH W Y d }~n
d }~0 0 d S N)r   pathexistsdirnamemakedirsOSErrorerrnoZEEXIST)filenameexcr   r   r   prepare   s    r   c                 C   sp   t jddj}t| }|j|dd t| tj	|rXt
j||ddrXt| d S t|| t| d S )NF)deleteT)Zpretty_print)Zshallow)tempfileZNamedTemporaryFilenameetreeZElementTreewriter   r   r   r   filecmpZcmpremover   )ZelefufiZtemp_file_nameZetr   r   r   install_xml'   s    


r"   c                 C   s   d}t | zt| }W n ty.   | }Y n0 tj|s\t|d}|| |  dS t|d(}|	 }t
|}t
|}W d    n1 s0    Y  ||krdS t|d}|| |  dS )N w   rr   )r   r
   	Exceptionr   r   r   openr   closereadnormalize_for_change)ZhtmlZ
there_fufistringZ
there_file
old_stringZ
new_stringr   r   r   install_html4   s*    


&

r.   c                 C   s6   t | d}| }W d    n1 s(0    Y  |S )Nr&   )r(   r*   )r!   filer,   r   r   r   slurpL   s    &r0   c                 C   s   t d| }td|}|S )Nr#   )
re_changedsubre_whitespace)r,   Znorm_stringr   r   r   r+   R   s    r+   Fc                 C   s0   d}|rd}t j| }t|}||}|S )Nz%Y-%m-%du   %Y‒%m‒%d)r   r   getmtimer   utcfromtimestampr	   )r!   prettyZdate_formattimemdater   r   r   r8   X   s    

r8   c              	   C   s   | dd  dkrzt | N}zt|}W n. tjjyV   td|  tjd i }Y n0 W d    n1 sl0    Y  |S t	| d`}|
 }|d}zt|}W n. tjjy   td|  tjd i }Y n0 W d    n1 s0    Y  |S )N.gzzfiler can't decode r/   r&   utf-8)r(   jsonloaddecoderZJSONDecodeErrorprintsysstderrgzipGzipFiler*   decodeloads)r!   the_filedataZ	json_fileZ
json_bytesZjson_stringr   r   r   r>   b   s"    
(
(r>   c                 C   s   t | tr| }|}n|}| }|dd  dkrjt|d }tj||dd W d    n1 s\0    Y  |S tj|dddd}t|d}|	| ~W d    n1 s0    Y  |S )	Nr9   r:   r$   r%   )indentF)Zensure_asciirI   r<   )

isinstancer
   r(   r=   dumpdumpsencoderC   rD   r   )Za1Za2r!   rH   rG   Zdump_stringr   r   r   rK   v   s    
.
 rK   c                 C   s   t j| }t|}|S r   )r   r   r4   int)r!   mtimer   r   r   rO      s    rO   c                 C   s"   t j| }t|}t|}|S r   )r   r   r4   rN   shotisermake)r!   rO   Zmshotir   r   r   shoti   s    
rR   c                 C   s*   t j| }t d}t|| }|S )Nr   )r   r   r4   r   r   r	   rN   )r!   rO   r   ager   r   r   rS      s    rS   c              	   C   s~   |rt d| d |   t| dH}t|d}||  W d    n1 sR0    Y  W d    n1 sp0    Y  d S )Nz	I append z to abrb)r@   r(   r   r*   )Zcanonic_fufiZsupplem_fufi
do_verboseZcanonic_fileZsupplem_filer   r   r   concat   s
    rW   c                 C   s6   t j| st| d t| d}| }|  |S )N is not there.br)r   r   isfiler'   r(   r*   r)   r!   rG   r,   r   r   r   bread   s    
r\   c                 C   s   t j| st| d | dd  dkr\t| d}| }W d    q1 sP0    Y  nDt| d }| }| }W d    n1 s0    Y  |	  |S )NrX   r9   r:   r&   )
r   r   rZ   r'   r(   r*   rC   rD   rE   r)   r[   r   r   r   sread   s    (&r]   c                 C   s`   t j| r0t| }||kr0|r,td|   dS t| d}|| |  |r\td|   dS )NzNo change in FZbwzI write T)r   r   rZ   r\   r@   r(   r   r)   )r!   r,   Zwith_backuprV   r-   rG   r   r   r   brite   s    

r^   Tc           	      C   sF  |du rt j| }nt|tr&|}nt|tr4|}|rht j| rht| }||krh|rdtd|   dS t	| d}| dd  dkrt	| d}|
| W d    q1 s0    Y  n:t	| d}|
|  W d    n1 s0    Y  |  |rtd|   t j| s(t|  d	|d urBt | ||f dS )
NTzfiler: I keep Fr$   r9   r:   wbzfiler: I wrote z should have been written.)r   r   r4   rJ   floatrN   rZ   r]   r@   r(   r   rC   rM   r)   r'   utime)	r!   r,   Z	do_backuprV   Zdo_change_checkZdo_preserve_timer7   r-   rG   r   r   r   srite   s4    


*,
rb   c                 C   s,   t j| }|dd }|d dkr(d S |S )Nr   
      -)r   r   basename)r!   bana	issuedater   r   r   get_issuedate  s
    ri   c                 C   s@   t j| s |rdS td|  t | }|j}||kr<dS dS )NTzfiler needs F)r   r   rZ   r'   statst_size)r!   Zallow_missingZmin_sizeinfosizer   r   r   is_empty  s    
rn   c           
      C   s   t j| s |rtd|   dS t | }|j}|dkrR|sR|rNt|  d dS |j}t|tsjt	d|D ]X}t j|std| t
jd qnt |}|j}	|	|krn|rt|  d|   dS qn|rtd	|   d
S )zdoes need renewalzfiler.donore does not see Tr   z	 is emptyz%filer.donere needs a list of in_fufisz donere does not see the in_fufi r;   z older than zfiler.donere skips F)r   r   rZ   r@   rj   rk   st_mtimerJ   listr'   rA   rB   )
Zout_fufiZin_fufisrV   Zdo_allow_emptyZout_infoZout_sizeZ	out_mtimeZin_fufiZin_infoZin_mtimer   r   r   donere  s8    


rq   c                 C   s   t j| }t j| }|d dkrPt|d tjd td|d  tjd dS |dd }d	}|d
rpd
}ntd| tjd dS |d | d | }t|D ](}|| krq|rtd|  t 	| qd S )Nrc   r   z, does not look like a filename to clear fromr;   zcheck digit is    Fr   r#   z.json.gzz)I can't handle the extension you have on /z_*z	I remove )
r   r   rf   r   r@   rA   rB   endswithglobr    )r!   rV   rg   fudirh   restglob_string
found_fufir   r   r   clear_by_issuedate=  s2    
rz   c                 C   s   t j| }|r$td| d|   |d | }|d urL| d| d| }t|D ]8}|| krt|rVtd|   qV|rtd|  t | qVd S )Nzfiler clears z from z/*rs   *zfiler.clear_by_extension keeps z!filer.clear_by_extension removes )r   r   r   r@   ru   r    )r!   extrV   prefixrv   rx   ry   r   r   r   clear_by_extensionW  s    r~   c                 C   sT   t j| r| S | dr8| d d }t j|rP|S n| d }t j|rP|S d S )Nr:   r9   )r   r   rZ   rt   )r!   Zfufi_without_gzZfufi_with_gzr   r   r   is_thereh  s    
r   )F)F)FF)FFTN)Fr   )FF)F)FN))rC   r   r   Z
lxml.etreer   ru   r   r   rer   r=   rP   rA   shutilr   compiler1   r3   r   r   r"   r.   r0   r+   r8   r>   rK   rO   rR   rS   rW   r\   r]   r^   rb   ri   rn   rq   rz   r~   r   r   r   r   r   <module>   sN   

	


	
-  
 	



