
    6e<.                     t   d dl Z d dlmZ d dlZd dlmZ d dlZd dlZd dlZd dlZd dl	Z	d dl
Z
d dlZd dlZd dlmZ  ej        d          Z ej        d          Zd Zd Zd Zd	 Zd
 Zd Zd#dZd Zd Zd Zd Zd Zd#dZd Zd Z d$dZ!	 	 d%dZ"d Z#d&dZ$d$dZ%d#dZ&d'dZ'd Z(d(dZ)d(d Z*d! Z+d" Z,dS ))    N)datetime)copyfilez7It\s+was\s+last\s+changed\s+on\s+\d{4}.\d{2}.\d{2}\.\s*z\s+c                      t          j                    } t          j                                        d          }dt          |           z   dz   |z   }|S )N%sz/tmp/_)osgetpidr   nowstrftimestr)pidtist	warc_paths      !/home/ernad/ernad/python/filer.pytemp_warc_pathr      sF    
)++C<>>""4((D#c(("S(4/I    c                 B   t           j                            t           j                            |                     sb	 t          j        t           j                            |                      d S # t
          $ r!}|j        t          j        k    r Y d }~d S d }~ww xY wd S N)r   pathexistsdirnamemakedirsOSErrorerrnoEEXIST)filenameexcs     r   preparer      s    7>>"'//(3344 	K1122222 	 	 	yEL(( )(((((	 s   1A1 1
B;BBc                    t          j        d          j        }t          j        |           }|                    |d           t          |           t          j        	                    |          r-t          j        ||d          rt          j        |           d S t          ||           t          j        |           d S )NF)deleteT)pretty_print)shallow)tempfileNamedTemporaryFilenameetreeElementTreewriter   r   r   r   filecmpcmpremover   )elefufitemp_file_nameets       r   install_xmlr0   '   s    0>>>CN		3		BHH^$H///DMMM	w~~d ;~tT::: 	In%%%F^T"""Inr   c                 X   d}t          |           	 t          |           }n# t          $ r | }Y nw xY wt          j                            |          s;t          |d          }|                    |           |                                 dS t          |d          5 }|	                                }t          |          }t          |          }d d d            n# 1 swxY w Y   ||k    rdS t          |d          }|                    |           |                                 dS )N w   rr   )r   r   	Exceptionr   r   r   openr(   closereadnormalize_for_change)html
there_fufistring
there_file
old_string
new_strings         r   install_htmlrA   4   sr   FJT   7>>*%% *c**
   q	j#		 2*__&&
)*55
)&11
2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 Zqj#&&JV1s   # 223CC"%C"c                 ~    t          | d          5 }|                                }d d d            n# 1 swxY w Y   |S Nr5   r7   r9   )r-   filer=   s      r   slurprF   L   st    	dC D              Ms   266c                 r    t                               d|           }t                              d|          }|S )Nr2   )
re_changedsubre_whitespace)r=   norm_strings     r   r:   r:   R   s0    ..V,,K##B44Kr   Fc                     d}|rd}t           j                            |           }t          j        |          }|                    |          }|S )Nz%Y-%m-%du   %Y‒%m‒%d)r   r   getmtimer   utcfromtimestampr   )r-   prettydate_formattimemdates        r   rR   rR   X   sO    K ,+7D!!D$T**DMM+&&ELr   c                 `   | dd          dk    ryt          |           5 }	 t          j        |          }n:# t          j        j        $ r# t          d|  t          j                   i }Y nw xY wd d d            n# 1 swxY w Y   |S t          j	        | d          5 }|
                                }|                    d          }	 t          j        |          }n:# t          j        j        $ r# t          d|  t          j                   i }Y nw xY wd d d            n# 1 swxY w Y   |S )N.gzzfiler can't decode rE   r5   utf-8)r7   jsonloaddecoderJSONDecodeErrorprintsysstderrgzipGzipFiler9   decodeloads)r-   the_filedata	json_file
json_bytesjson_strings         r   rY   rY   b   s   BCCyE$ZZ 	8y**</   2D22DDDD	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	tS	!	! Y^^%%
 ''00	:k**DD|+ 	 	 	...SZ@@@@DDD	               Ksg   A;5A;4A,)A;+A,,A;;A?A?*D#CD#4DD#DD##D'*D'c                    t          | t                    r| }|}n|}| }t          |           |dd          dk    rAt          |d          5 }t	          j        ||d           d d d            n# 1 swxY w Y   |S t	          j        |dd                              d          }t          j	        |d          5 }|
                    |           ~d d d            n# 1 swxY w Y   |S )	NrT   rU   r3   r4   )indentF)ensure_asciiri   rW   )
isinstancer   r   r7   rX   dumpdumpsencoder_   r`   r(   )a1a2r-   rd   rc   dump_strings         r   rl   rl   v   sh   "c DMMMBCCyE$__ 	0IdHQ////	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 	0 	0*Ta@@@GGPPK	tS	!	! X{###               Ks$   A00A47A4<CC#&C#c                 b    t           j                            |           }t          |          }|S r   )r   r   rM   int)r-   mtimes     r   rt   rt      s'    GT""EJJELr   c                     t           j                            |           }t          |          }t	          j        |          }|S r   )r   r   rM   rs   shotisermake)r-   rt   mshotis      r   shotiry      s6    GT""EJJE]5!!FMr   c                     t           j                            |           }t          j                                        d          }t          |          |z
  }|S )Nr   )r   r   rM   r   r
   r   rs   )r-   rt   r
   ages       r   r{   r{      sE    GT""E
,..
!
!$
'
'C
c((U
CJr   c                 *   |rt          d|z   dz   | z              t          | d          5 }t          |d          5 }|                    |                                           d d d            n# 1 swxY w Y   d d d            d S # 1 swxY w Y   d S )Nz	I append z to abrb)r\   r7   r(   r9   )canonic_fufisupplem_fufi
do_verbosecanonic_filesupplem_files        r   concatr      s    BkL(61L@AAA	lD	!	! 4\,%% 	4|0022333	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	4 	44 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4s4   B(A0$B0A4	4B7A4	8BBBc                     t           j                            |           st          | dz             t	          | d          }|                                }|                                 |S )N is not there.br)r   r   isfiler6   r7   r9   r8   r-   rc   r=   s      r   breadr      s]    7>>$ 1//000D$H]]__FNNMr   c                    t           j                            |           st          | dz             | dd          dk    r=t	          | d          5 }|                                }d d d            n# 1 swxY w Y   nit          j        | d          5 }|                                }|                                }d d d            n# 1 swxY w Y   |	                                 |S )Nr   rT   rU   r5   )
r   r   r   r6   r7   r9   r_   r`   ra   r8   r   s      r   sreadr      sD   7>>$ 1//000BCCyE$__ 	%]]__F	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% ]4%% 	%]]__F]]__F	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	% 	Ms$   A11A58A5)CCCc                 4   t           j                            |           r+t          |           }||k    r|rt	          d| z              dS t          | d          }|                    |           |                                 |rt	          d| z              dS )NzNo change in FbwzI write T)r   r   r   r   r\   r7   r(   r8   )r-   r=   with_backupr   r?   rc   s         r   briter      s    	w~~d 4[[
 .o,---5D$HNN6NN !j4   4r   Tc                    |du r t           j                            |           }n/t          |t                    r|}nt          |t
                    r|}|rJt           j                            |           r+t          |           }||k    r|rt          d| z              dS t          | d          }| dd          dk    r>t          | d          5 }|
                    |           d d d            n# 1 swxY w Y   nTt          j	        | d          5 }|
                    |                                           d d d            n# 1 swxY w Y   |                                 |rt          d| z              t           j                            |           st          |  d	          |t          j        | ||f           dS )
NTzfiler: I keep Fr3   rT   rU   wbzfiler: I wrote z should have been written.)r   r   rM   rk   floatrs   r   r   r\   r7   r(   r_   rn   r8   r6   utime)	r-   r=   	do_backupr   do_change_checkdo_preserve_timerQ   r?   rc   s	            r   sriter      s7   D  w%%	$e	,	,  	$c	*	*   27>>$// 4[[
 /&-...5D#HBCCyE$__ 	#NN6"""	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# 	# YtT"" 	,hNN6==??+++	, 	, 	, 	, 	, 	, 	, 	, 	, 	, 	, 	, 	, 	, 	,NN ($&'''7>>$ =4;;;<<<#
d|$$$4s$   C00C47C4(EE	E	c                 t    t           j                            |           }|dd         }|d         dk    rd S |S )Nr   
      -)r   r   basename)r-   bana	issuedates      r   get_issuedater   	  s>    7D!!DQrT
I|str   c                     t           j                            |           s|rdS t          d| z             t          j        |           }|j        }||k    rdS dS )NTzfiler needs F)r   r   r   r6   statst_size)r-   allow_missingmin_sizeinfosizes        r   is_emptyr     sa    7>>$ 3 	34NT122274==D<Dxt5r   c                 Z   t           j                            |           s|rt          d| z              dS t          j        |           }|j        }|dk    r|s|rt          |  d           dS |j        }t          |t                    st          d          |D ]z}t           j                            |          st          d|z   t          j                   @t          j        |          }|j        }	|	|k    r|rt          |  d|             dS {|rt          d	| z              d
S )zdoes need renewalzfiler.donore does not see Tr   z	 is emptyz%filer.donere needs a list of in_fufisz donere does not see the in_fufi rV   z older than zfiler.donere skips F)r   r   r   r\   r   r   st_mtimerk   listr6   r]   r^   )
out_fufiin_fufisr   do_allow_emptyout_infoout_size	out_mtimein_fufiin_infoin_mtimes
             r   donerer     sl   7>>(##  	;.9:::twx  HH1}}^} 	*X((()))t!Ih%% A?@@@ 
 
w~~g&& 	4w>z# # # #''""#i :88w8899944    0#h.///5r   c                 R   t           j                            |           }t           j                            |           }|d         dk    rDt	          |dz   t
          j                   t	          d|d         z   t
          j                   dS |dd         }d	}|                    d
          rd
}n t	          d|z   t
          j                   dS |dz   |z   dz   |z   }t          j        |          D ]1}|| k    r	|rt	          d|z              t          j	        |           2d S )Nr   r   z, does not look like a filename to clear fromrV   zcheck digit is    Fr   r2   z.json.gzz)I can't handle the extension you have on /z_*z	I remove )
r   r   r   r   r\   r]   r^   endswithglobr+   )r-   r   r   fudir   restglob_string
found_fufis           r   clear_by_issuedater   >  sU   7D!!D7??4  DBx3dCC:	 	 	 	$r(*:	 	 	 	uQrT
ID}}Z   9D@:	 	 	 	u*y(4/$6Ki,,  
 	,+
*+++
	* r   c                 N   t           j                            |           }|rt          d| d|             |dz   |z   }|
| d| d| }t	          j        |          D ]E}|| k    r|rt          d| z              |rt          d|z              t          j        |           Fd S )Nzfiler clears z from z/*r   *zfiler.clear_by_extension keeps z!filer.clear_by_extension removes )r   r   r   r\   r   r+   )r-   extr   prefixr   r   r   s          r   clear_by_extensionr   X  s    7??4  D 1/c////000+#K......i,,  
 @7$>??? 	D5
BCCC
	* r   c                    t           j                            |           r| S |                     d          r,| d d         }t           j                            |          r|S n&| dz   }t           j                            |          r|S d S )NrU   rT   )r   r   r   r   )r-   fufi_without_gzfufi_with_gzs      r   is_therer   i  s    	w~~d }}U  ss)7>>/** 	#""	# e|7>>,'' 	 4r   c                    |t          j        d          }	 t          j        | |          }n^# t          $ r" t	          d|  t
          j                   Y d S t           j        $ r" t	          | dz   t
          j                   Y d S w xY w|S )NT)remove_blank_textzfiling can not open rV   z is not well formed)r&   	XMLParserparser   r\   r]   r^   XMLSyntaxErrorr-   parserdocs      r   	parse_laxr   x  s    ~4888k$''   +T++#*====tt   d**<<<<tt Js   / (B
,B
	B
c                 Z    |t          j                    }t          j        | |          }|S r   )r&   r   r   r   s      r   parse_strictr     s.    ~"" +dF
#
#C Jr   c                     t          | d          5 }|                    d          dk    cd d d            S # 1 swxY w Y   d S )Nr~      s   rD   )filepathtest_fs     r   is_gzr     s    	h		 -{{1~~,- - - - - - - - - - - - - - - - - -s   7;;c                 v    t          | d          }|                                }|                                 |S rC   )r7   	readlinesr8   )r-   rc   liness      r   read_as_linesr     s5    D#H  ENNLr   )F)FF)FFTN)Fr   )FNr   )-r_   r   r   
lxml.etreer&   r   r#   r)   rer   rX   rv   r]   shutilr   compilerH   rJ   r   r   r0   rA   rF   r:   rR   rY   rl   rt   ry   r{   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r    r   r   <module>r      s          				          				    



       RZ @ A A

6""    
 
 
  0         (  &      4 4 4 4       Z 5:15   @  
 
 
 
   >   4   "         - - -
    r   