o
    ak                     @   sN   d dl Z d dlZd dlZd dlZd dlZd dlZd dlmZ G dd dZ	dS )    N)Welfoc                   @   s\   e Zd Zdd Zdd ZdddZdd	d
ZdddZdddZdd Z	dd Z
dd ZdS )Dexisc                 C   s"   || _ i | _d | _d | _d | _d S N)edata	issuedatedoc_file
issuefufis)selferimp r   !/home/ernad/ernad/python/dexis.py__init__   s   zDexis.__init__c                 C   s6   | j }|j|jd }|| _|D ]}| | qdS )z-> update for all issue filesissuesN)r   ddates_uniquedirsr	   update_issue)r
   r   r	   r   r   r   r   update_all_issues   s   zDexis.update_all_issuesNc                 C   s:  || j krdS || _ | j}|durt|}| jjd d | d }| jdu r3|j|jd }|| _|dur>td|  ntd || jvrX|du rXtd	| d
t	j
d dS |du rb| j| }nt|}tj|dd}|| _|| jvrtj|s|du rtd| d t	j
d dS td|  t|| j|< dS )z3run this before working on a file, loads dexis dataTNdexis/.json.gzr   zdexis gets doc_fufi zdexis gets no doc_fufizdexis sees no issue fufi for .filerbmodez
dexis: no z, run dexis.zdexis loads )r   r   filerget_issuedater   r	   r   r   printsysstderrgzipopenr   r   ospathisfileload)r
   r   doc_fufir   Z
dexis_fufir	   r   r   r   r   set_issuedate"   sB   




zDexis.set_issuedateTc                 C   s4   | j }t| j|  }t|}| j||d}|S )N)with_ns)r   listr   keysrandomchoicefetch)r
   r+   r   Zpapidspapidoctetsr   r   r   r.   J   s
   
zDexis.randomFc                 C   s   | j }| j| }||vrtd| d | tjd dS || }|d }|d }| j| | j|}|s7|S | jj	d 
d}	d	| jj	d
 
d d }
|dd dkrgd|
 d |	 d |dd  }|S td)z-get the doc papid, assumed it is in issuedatezI don't see z in the dexis for r   Nslamfzutf-8s   xmlns:xsi="Zxsis   " r   
   s
   <text ref=s   <text s   xmlns="s   " ref="   zI have a namespace issue)r   r   r    r!   r"   r   seekreadr   nsencode	Exception)r
   r1   r+   r   Z
dexis_dataZ
papid_datastartlengthr2   amf_nsZxsi_nsr   r   r   r0   Q   s0   

zDexis.fetchc                 C   s   |du r	| j | }| jjd d | d }tj|r)tj|tj|kr)dS tj|dd}t	| j| _
|j}tj|dd| _tjj | _d| _| j| j_| j| j_| j| j_i | _|| _| j| t| j| | jS )z/updates an issue by issuedate, with mtime checkNr   r   r   Fr   r   )r	   r   r   r%   r&   r'   getmtimer#   r$   r   welfoname
check_filexmlparsersexpatParserCreatedocendstart_elementStartElementHandlerend_elementEndElementHandlerend_doc_checkCharacterDataHandlerr   xml_file	ParseFiler   dump)r
   r   r)   out_fufirO   Zxml_fufir   r   r   r   m   s(   



zDexis.update_issuec                 C   sF   |  | |dkrdS d|vrd| _dS d| _|d | _| jj| _dS )z"expat handler for start of elementtextNrefTF)rM   skip_next_textrT   rF   CurrentByteIndexr=   )r
   rB   attrsr   r   r   rI      s   

zDexis.start_elementc                 C   s0   |  | |dkrdS | jrd| _dS d| _dS )z expat handler for end of elementrS   NFT)rM   rU   rH   r
   rB   r   r   r   rK      s   

zDexis.end_elementc                 C   sr   | j sdS d| _ | jj| _| j| j | _| j| j| j| j i | j	| j
< | j| j	| j
 d< | j| j	| j
 d< dS )z?expat handler to check if we have reached the end of a documentNFr3   r4   )rH   rF   rV   endr=   r>   rA   checkrC   r   rT   rX   r   r   r   rM      s   
zDexis.end_doc_checkr   )T)F)__name__
__module____qualname__r   r   r*   r.   r0   r   rI   rK   rM   r   r   r   r   r      s    

(


r   )
r%   r#   r!   r.   xml.parsers.expatrD   r   rA   r   r   r   r   r   r   <module>   s   