
    ak                     \    d dl Z d dlZd dlZd dlZd dlZd dlZd dlmZ  G d d          Z	dS )    N)Welfoc                   L    e Zd Zd Zd ZddZddZddZdd	Zd
 Z	d Z
d ZdS )Dexisc                 L    || _         i | _        d | _        d | _        d | _        d S N)edata	issuedatedoc_file
issuefufis)selferimps     !/home/ernad/ernad/python/dexis.py__init__zDexis.__init__   s*    	t    c                     | j         }|j                            |j        d                   }|| _        |D ]}|                     |           dS )z-> update for all issue filesissuesN)r   ddates_uniquedirsr   update_issue)r   r   r   r
   s       r   update_all_issueszDexis.update_all_issues   s\    FS%%afX&677
$# 	) 	)Ii((((	) 	)r   Nc                    || j         k    rdS || _         | j        }|t          j        |          }| j        j        d         dz   |z   dz   }| j        ,|j                            |j        d                   }|| _        |t          d|z              nt          d           || j        vr#|!t          d	| d
t          j
                   dS || j        |         }nt          j        |          }t          j        |d          }|| _        || j        vrrt          j                            |          s%|!t          d|z   dz   t          j
                   dS t          d|z              t          j        |          | j        |<   dS )z3run this before working on a file, loads dexis dataTNdexis/.json.gzr   zdexis gets doc_fufi zdexis gets no doc_fufizdexis sees no issue fufi for .filerbmodez
dexis: no z, run dexis.zdexis loads )r
   r   filerget_issuedater   r   r   r   printsysstderrgzipopenr   r	   ospathisfileload)r   r
   doc_fufir   
dexis_fufir   r   s          r   set_issuedatezDexis.set_issuedate"   s   &&4"F +H55IV[)C/);jH
?"))!&*:;;J(DO(834444*+++DO++0@>)>>>z# # # #4y1HH+H55I9XD111 DI%%7>>*-- #,3nD"z+ + + +t.:-...#(:j#9#9DIi tr   Tc                     | j         }t          | j        |                                                   }t	          j        |          }|                     ||          }|S )N)with_ns)r
   listr	   keysrandomchoicefetch)r   r2   r
   papidspapidoctetss         r   r5   zDexis.randomJ   sR    N	di	*//1122f%%E733r   Fc                 .   | j         }| j        |         }||vr&t          d|z   dz   |z   t          j                   dS ||         }|d         }|d         }| j                            |           | j                            |          }|s|S | j        j	        d         
                    d          }	d	| j        j	        d
         
                    d          z   dz   }
|dd         dk    rd|
z   dz   |	z   dz   |dd         z   }|S t          d          )z-get the doc papid, assumed it is in issuedatezI don't see z in the dexis for r   Nslamfzutf-8s   xmlns:xsi="xsis   " r   
   s
   <text ref=s   <text s   xmlns="s   " ref="   zI have a namespace issue)r
   r	   r%   r&   r'   r   seekreadr   nsencode	Exception)r   r9   r2   r
   
dexis_data
papid_datastartlengthr:   amf_nsxsi_nss              r   r7   zDexis.fetchQ   s>   N	
 Yy)

"".5(+??)Kz# # # #4&
3C5!!!##F++ 	M5!((11$&)E"2"9"9'"B"BBUJ!B$<=(('*4v=#BCC[)FM6777r   c                    || j         |         }| j        j        d         dz   |z   dz   }t          j                            |          rBt          j                            |          t          j                            |          k    rdS t          j        |d          }t          | j                  | _
        |j        }t          j        |d          | _        t          j        j                                        | _        d| _        | j        | j        _        | j        | j        _        | j        | j        _        i | _        || _        | j                            |           t7          j        | j        |           | j        S )z/updates an issue by issuedate, with mtime checkNr   r   r   Fr    r!   )r   r   r   r*   r+   r,   getmtimer(   r)   r   welfoname
check_filexmlparsersexpatParserCreatedocendstart_elementStartElementHandlerend_elementEndElementHandlerend_doc_checkCharacterDataHandlerr	   xml_file	ParseFiler#   dump)r   r
   r.   out_fufir]   xml_fufis         r   r   zDexis.update_issuem   s=   y1H6;w'#-	9JF7>>(## 	w))BG,<,<X,F,FFFu9XD11146]]
=)H4888[&3355
)-);
&'+'7
$*.*<
'	 
X&&&
49h'''yr   c                     |                      |           |dk    rdS d|vr	d| _        dS d| _        |d         | _        | j        j        | _        dS )z"expat handler for start of elementtextNrefTF)r[   skip_next_textrd   rT   CurrentByteIndexrI   )r   rP   attrss      r   rW   zDexis.start_element   s_    4   6>>F"&DF#<Z0


r   c                 n    |                      |           |dk    rdS | j        r	d| _        dS d| _        dS )z expat handler for end of elementrc   NFT)r[   re   rV   r   rP   s     r   rY   zDexis.end_element   sG    4   6>>F 	"'DFr   c                 L   | j         sdS d| _         | j        j        | _        | j        | j        z
  | _        | j                            | j        | j        | j                   i | j	        | j
        <   | j        | j	        | j
                 d<   | j        | j	        | j
                 d<   dS )z?expat handler to check if we have reached the end of a documentNFr<   r=   )rV   rT   rf   endrI   rJ   rO   checkrQ   r	   rd   ri   s     r   r[   zDexis.end_doc_check   s    { 	F:.h+
$*dkBBB 	$(#':	$(C #';	$(C   r   r   )T)F)__name__
__module____qualname__r   r   r0   r5   r7   r   rW   rY   r[    r   r   r   r      s        
 
 
) ) )& & & &P   8 8 8 88   41 1 1  / / / / /r   r   )
r*   r(   r&   r5   xml.parsers.expatrR   r#   rO   r   r   rp   r   r   <module>rr      s    				  



            ]/ ]/ ]/ ]/ ]/ ]/ ]/ ]/ ]/ ]/r   