
    73i<                        S SK r S SKJr  S SKrS SKJr  S SKrS SKrS SKrS SKrS SK	r	S SK
r
S SKrS SKrS SKrS SKJr  \R                  " S5      r\R                  " S5      rS rS rS rS	 rS
 rS rS&S jrS rS rS rS rS rS&S jrS rS r S'S jr!  S(S jr"S r#S)S jr$  S*S jr%S+S jr&S r'S,S jr(S,S jr)S r*S r+S  r,S! r-S" r.S# r/S-S$ jr0S% r1g).    N)datetime)copyfilez7It\s+was\s+last\s+changed\s+on\s+\d{4}.\d{2}.\d{2}\.\s*z\s+c                      [         R                  " 5       n [        R                  " 5       R	                  S5      nS[        U 5      -   S-   U-   nU$ )N%sz/tmp/_)osgetpidr   nowstrftimestr)pidtist	warc_paths      !/home/ernad/ernad/python/filer.pytemp_warc_pathr      sA    
))+C<<>""4(D#c("S(4/I    c                 \   [         R                  R                  [         R                  R                  U 5      5      (       d5   [         R                  " [         R                  R                  U 5      5        g g ! [
         a)  nUR                  [        R                  :w  a  e  S nAg S nAff = fN)r   pathexistsdirnamemakedirsOSErrorerrnoEEXIST)filenameexcs     r   preparer      so    77>>"''//(344	KK12 5  	yyELL( )	s   3A8 8
B+B&&B+c                    [         R                  " SS9R                  n[        U [        R
                  5      (       d  [        R                  " U 5      nOU nUR                  USS9  [        U5        [        R                  R                  U5      (       a1  [        R                  " X!SS9(       a  [        R                  " U5        g [        X!5        [        R                  " U5        g )NF)deleteT)pretty_print)shallow)tempfileNamedTemporaryFilename
isinstanceetree_ElementTreeElementTreewriter   r   r   r   filecmpcmpremover   )ingestfufitemp_file_namedocs       r   install_xmlr2   (   s    00>CCNfe0011'IIn4I0DM	ww~~d;;~T:IIn%^"IInr   c                    Sn[        U5         [        U 5      n[        R                  R                  U5      (       d.  [        US5      nUR                  U5        UR                  5         g[        US5       nUR                  5       n[        U5      n[        U5      nS S S 5        WW:X  a  g[        US5      nUR                  U5        UR                  5         g! [         a    U n Nf = f! , (       d  f       NU= f)N w   rr   )r   r   	Exceptionr   r   r   openr*   closereadnormalize_for_change)html
there_fufistring
there_file
old_string
new_strings         r   install_htmlrC   8   s    FJT 77>>*%%*c*
 	j#	*__&
)*5
)&1
 
 Zj#&JV!   
	s   C 8'C.C+*C+.
C<c                 t    [        U S5       nUR                  5       nS S S 5        U$ ! , (       d  f       W$ = fNr7   r9   r;   )r/   filer?   s      r   slurprH   P   s/    	dCD 
M 
Ms   (
7c                 ^    [         R                  SU 5      n[        R                  SU5      nU$ )Nr4   )
re_changedsubre_whitespace)r?   norm_strings     r   r<   r<   V   s*    ..V,K##B4Kr   c                     SnU(       a  Sn[         R                  R                  U 5      n[        R                  " U5      nUR                  U5      nU$ )Nz%Y-%m-%du   %Y‒%m‒%d)r   r   getmtimer   utcfromtimestampr   )r/   prettydate_formattimemdates        r   rT   rT   \   sF    K+77D!D$$T*DMM+&ELr   c                    U SS  S:w  a-  [        U 5       n [        R                  " U5      nS S S 5        U$ [
        R                  " U S5       nUR                  5       nUR                  S5      n [        R                  " U5      nS S S 5        U$ ! [        R                  R                   a    0 n Nf = f! , (       d  f       W$ = f! [        R                  R                   a    0 n Ndf = f! , (       d  f       W$ = f)N.gzr7   utf-8)
r9   jsonloaddecoderJSONDecodeErrorgzipGzipFiler;   decodeloads)r/   the_filedata	json_file
json_bytesjson_strings         r   rZ   rZ   f   s    BCyE$Z8yy*  	tS	!Y^^%
 ''0	::k*D	 
" K <<//  Z  ||++ 	D	 
"	! KsR   B7B"C/1C	 B41B73B44B77
C	 C,)C/+C,,C//
C>c                    [        U [        5      (       a  U nUnOUnU n[        U5        USS  S:w  a,  [        US5       n[        R
                  " X4SS9  S S S 5        U$ [        R                  " USSS9R                  S5      S	-   n[        R                  " US5       nUR                  U5        AS S S 5        U$ ! , (       d  f       U$ = f! , (       d  f       U$ = f)
NrV   rW   r5   r6   )indentF)ensure_asciirg   rX      
)r&   r   r   r9   rY   dumpdumpsencoder]   r^   r*   )a1a2r/   rb   ra   dump_strings         r   rj   rj   z   s    "cDMBCyE$_IIdQ/ **T$%''-vg?K	tS	!X{# 
" K _ 
"	! Ks   B;C;
C

Cc                 Z    [         R                  R                  U 5      n[        U5      nU$ r   )r   r   rO   int)r/   mtimes     r   rr   rr      s$    GGT"EJELr   c                     [         R                  R                  U 5      n[        U5      n[        R
                  " U5      nU$ r   )r   r   rO   rq   shotisermake)r/   rr   mshotis      r   shotirw      s2    GGT"EJE]]5!FMr   c                     [         R                  R                  U 5      n[        R                  " 5       R                  S5      n[        U5      U-
  nU$ )Nr   )r   r   rO   r   r
   r   rq   )r/   rr   r
   ages       r   ry   ry      s?    GGT"E
,,.
!
!$
'C
c(U
CJr   c                    U(       a  [        SU-   S-   U -   5        [        U S5       n[        US5       nUR                  UR                  5       5        S S S 5        S S S 5        g ! , (       d  f       N= f! , (       d  f       g = f)Nz	I append z to abrb)printr9   r*   r;   )canonic_fufisupplem_fufi
do_verbosecanonic_filesupplem_files        r   concatr      sh    kL(61L@A	lD	!\,%|0023 & 
"	!%% 
"	!s"   A7 A&A7&
A4	0A77
Bc                     [         R                  R                  U 5      (       d  [        U S-   5      e[	        U S5      nUR                  5       nUR                  5         U$ )N is not there.br)r   r   isfiler8   r9   r;   r:   r/   ra   r?   s      r   breadr      sL    77>>$//00D$H]]_FNNMr   c                    [         R                  R                  U 5      (       d  [        U S-   5      eU SS  S:w  a'  [	        U S5       nUR                  5       nS S S 5        U$ [        R                  " U S5       nUR                  5       nUR                  5       nS S S 5        WR                  5         W$ ! , (       d  f       W$ = f! , (       d  f       N2= f)Nr   rV   rW   r7   )
r   r   r   r8   r9   r;   r]   r^   r_   r:   r   s      r   sreadr      s    77>>$//00BCyE$_]]_F  M	 ]]4%]]_F]]_F & 	M _ M	 &%s   B5:!C5
C
Cc                    [         R                  R                  U 5      (       a&  [        U 5      nXA:X  a  U(       a  [	        SU -   5        g[        U S5      nUR                  U5        UR                  5         U(       a  [	        SU -   5        g)NzNo change in FbwzI write T)r   r   r   r   r}   r9   r*   r:   )r/   r?   with_backupr   rA   ra   s         r   briter      sl    	ww~~d4[
o,-D$HNN6NNj4 r   c                 
   USL a   [         R                  R                  U 5      nO/[        U[        5      (       a  UnO[        U[
        5      (       a  UnU(       aJ  [         R                  R                  U 5      (       a&  [        U 5      nXq:X  a  U(       a  [        SU -   5        g[        U S5      nU SS  S:w  a'  [        U S5       nUR                  U5        S S S 5        O?[        R                  " U S5       nUR                  UR                  5       5        S S S 5        UR                  5         U(       a  [        SU -   5        Ub  [         R                  " U WU45        g! , (       d  f       NP= f! , (       d  f       Na= f)	NTzfiler: I keep Fr5   rV   rW   wbzfiler: I wrote )r   r   rO   r&   floatrq   r   r   r}   r9   r*   r]   rl   r:   utime)	r/   r?   	do_backupr   do_change_checkdo_preserve_timerS   rA   ra   s	            r   sriter      s%   D ww%	$e	,	,	$c	*	*277>>$//4[
&-.D#HBCyE$_NN6" _ YYtT"hNN6==?+ #NN$&' #
d|$ _ #"s   E#9 E4#
E14
Fc                 b    [         R                  R                  U 5      nUSS nUS   S:w  a  g U$ )Nr   
      -)r   r   basename)r/   bana	issuedates      r   get_issuedater     s6    77D!DQr
I|sr   c                     [         R                  R                  U 5      (       d  U(       a  g[        SU -   5      e[         R                  " U 5      nUR
                  nXB::  a  gg)NTzfiler needs F)r   r   r   r8   statst_size)r/   allow_missingmin_sizeinfosizes        r   is_emptyr     sM    77>>$NT122774=D<<Dr   c                    [         R                  R                  U 5      (       d  U(       a  [        SU -   5        g[         R                  " U 5      nUR
                  nUS:X  a  U(       d  U(       a  [        U  S35        gUR                  n[        U[        5      (       d  [        S5      eU H  nUc  M  [         R                  R                  U5      (       d$  U(       d  [        SU-   [        R                  S9  MP  [         R                  " U5      n	U	R                  n
X:  d  My  U(       a  [        U  SU 35          g   U(       a  [        S	U -   5        g
)zdoes need renewalzfiler.donore does not see Tr   z	 is emptyz%filer.donere needs a list of in_fufisz donere does not see the in_fufi rG   z older than zfiler.donere skips F)r   r   r   r}   r   r   st_mtimer&   listr8   sysstderr)out_fufiin_fufisr   do_allow_emptysilent_absentout_infoout_size	out_mtimein_fufiin_infoin_mtimes              r   donerer   $  s    77>>(##.9:wwx HH1}^XJi()!!Ih%%?@@?ww~~g&&}4w>zz#'''"##
,wi89  #h./r   c                 d   [         R                  R                  U 5      nU(       a  [        SU SU  35        US-   U-   nUb
  U SU SU 3n[        R                  " U5       HJ  nX`:X  a  U(       a  [        SU -   5        M  U(       a  [        SU-   5        [         R
                  " U5        ML     g )Nzfiler clears z from z/*/*zfiler.clear_by_extension keeps z!filer.clear_by_extension removes )r   r   r   r}   globr-   )r/   extr   prefixfudiglob_string
found_fufis          r   clear_by_extensionr   b  s    77??4 DcU&/0+#Kaxq.ii,
7$>?5
BC
		* -r   c                 ,   [         R                  R                  U 5      (       a  U $ U R                  S5      (       a-  U S S n[         R                  R                  U5      (       a  U$  g U S-   n[         R                  R                  U5      (       a  U$ g )NrW   rV   )r   r   r   endswith)r/   fufi_without_gzfufi_with_gzs      r   is_therer   s  s~    	ww~~d}}Us)77>>/**"" +  e|77>>,''r   c                 v   U R                  S5      (       a:   [        R                  " U S5       n[        R                  " U5      nUsS S S 5        $ Uc  [        R                  " SS9n [        R                  " X5      nU$ ! , (       d  f       N>= f! [
         a     g [        R                   a,    [        R                  " X S S 5        U S S n [        U 5      s $ f = f! [         a    [        SU  3[        R                  S9   g [        R                   a    [        U S-   [        R                  S9   g f = f)	NrW   r|   rV   T)remove_blank_textzfiling can not open r   z is not well formed)r   r]   r9   r'   parseFileNotFoundErrorBadGzipFileshutilmove	parse_lax	XMLParserr   r}   r   r   XMLSyntaxError)r/   parsergz_filetreer1   s        r   r   r     s   }}U		#4&'{{7+ '& ~48kk$' J) '& ! 	 	#KK3Bi(9DT?"	#  $TF+#**= d**<sL   B B	B )C  
BB B 
C<CC %D8.D87D8c                 b    Uc  [         R                  " 5       n[         R                  " X5      nU$ r   )r'   r   r   )r/   r   r1   s      r   parse_strictr     s*    ~" ++d
#C Jr   c                 x    [        U S5       nUR                  S5      S:H  sS S S 5        $ ! , (       d  f       g = f)Nr|      s   rF   )r/   test_fs     r   is_gzr     s*    	dD	V{{1~, 
		s   +
9c                 n   [         R                  R                  U 5      (       d  gU R                  S5      (       d  g[	        U 5      (       a  g[         R
                  " U 5      n[        U S5       nUR                  5       nS S S 5        WR                  5         [        R                  " U S5       nUR                  WR                  5       5        S S S 5        UR                  5         [         R                  " XR                  UR                  45        g! , (       d  f       N= f! , (       d  f       N\= f)NFrW   r7   r   T)r   r   r   r   r   r   r9   r;   r:   r]   r*   rl   r   st_atimer   )r/   r   ra   r?   s       r   
correct_gzr     s    77>>$==T{{774=D	dCH 
NN	4	(v}}' 
NNHHTMM4==12 
 
	s   0D0 D&
D#&
D4c                 ^    [        U S5      nUR                  5       nUR                  5         U$ rE   )r9   	readlinesr:   )r/   ra   liness      r   read_as_linesr     s*    D#H ENNLr   c                     [        U S5      nUR                  5       n/ nU H  nUR                  US S 5        M     UR                  5         U$ )Nr7   )r9   r   appendr:   )r/   ra   r   outlines        r   
read_linesr     sK    D#H E
C

49 NNJr   c                    U R                  S5      nUR                  S5      nSnU[        [        U5      [        U5      5      :  a4  X$   X4   :w  a  O)US-  nU[        [        U5      [        U5      5      :  a  M4  X$S  nX4S  nS[        U5      S-
  -  SR                  U5      -   n[        R
                  R                  U5      (       a  [        R                  " U5        [        R                  " XQ5        g )Nr   r   r6   z../)	splitminlenjoinr   r   lexistsr-   symlink)	orig_fufi	dest_fufi	orig_dirs	dest_dirscommon_countr   s         r   	link_fufir     s    $I$IL
S^S^<
<"i&== S^S^<
< -(I-(IC	NQ&'#((9*==D	wwy!!
		)JJtr   c                 |    Sn[        U S5       n[        S U 5       5      nS S S 5        U$ ! , (       d  f       U$ = f)Nr   r|   c              3   &   #    U  H  nS v   M	     g7f)r6   N ).0r   s     r   	<genexpr>count_lines.<locals>.<genexpr>  s     .X!Xs   )r9   sum)r/   count_linesra   s      r   r   r     s=    K	dD	X.X.. 
 
	s   ,
;c                     [        U 5        [        R                  " U 5      nSnU H3  nUS-  nU(       a  [        SU 35        [        R                  " U5        M5     U$ )Nr   r6   zfiler removes )r}   r   r   r-   )r   r   fufiscount_filesr/   s        r   clear_by_globr     sV    	+IIk"EKqN4&)*
		$	 
 r   c                    U R                  S5      (       d  [        SU  35        g U S S n[        R                  R	                  U5      (       a  [        SU 35        U $ [
        R                  " U S5       n[        US5       nUR                  UR                  5       5        S S S 5        S S S 5        U$ ! , (       d  f       N= f! , (       d  f       U$ = f)NrW   zfiler does not decompress rV   zfiler: see r|   r   )	r   r}   r   r   r   r]   r9   r*   r;   )r   ou_fufiin_fileou_files       r   make_unzipped_filer  
  s    E""*7)45crlG	ww~~gG9%&	7D	!W'4 GMM',,.) ! 
" N !  
"	! Ns$   6C B5#C5
C	?C
C)F)FF)FFTN)Fr   )FFF)FNr   )T)2r]   r   r   
lxml.etreer'   r   r#   r+   rer   rY   rt   r   r   r   compilerJ   rL   r   r   r2   rC   rH   r<   rT   rZ   rj   rr   rw   ry   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  r   r   r   <module>r	     s	     	     	     
  ZZ @ A


6" 0((4Z 5:15@
 AF |":-
"8  	r   