a
    ak                     @   sN   d dl Z d dlZd dlZd dlZd dlZd dlZd dlmZ G dd dZ	dS )    N)Welfoc                   @   s\   e Zd Zdd Zdd ZdddZdd	d
ZdddZdddZdd Z	dd Z
dd ZdS )Dexisc                 C   s"   || _ i | _d | _d | _d | _d S )N)edata	issuedatedoc_file
issuefufis)selferimp r   !/home/ernad/ernad/python/dexis.py__init__   s    zDexis.__init__c                 C   s6   | j }|j|jd }|| _|D ]}| | q"dS )z-> update for all issue filesissuesN)r   ddates_uniquedirsr   update_issue)r	   r   r   r   r   r   r   update_all_issues   s
    zDexis.update_all_issuesNc                 C   s@  || j krdS || _ | j}|dur,t|}| jjd d | d }| jdu rf|j|jd }|| _|dur|td|  ntd || jvr|du rtd	| d
t	j
d dS |du r| j| }n
t|}tj|dd}|| _|| jvr<tj|s |du rtd| d t	j
d dS td|  t|| j|< dS )z3run this before working on a file, loads dexis dataTNdexis/.json.gzr   zdexis gets doc_fufi zdexis gets no doc_fufizdexis sees no issue fufi for .filerbmodez
dexis: no z, run dexis.zdexis loads )r   r   filerget_issuedater   r   r   r   printsysstderrgzipopenr   r   ospathisfileload)r	   r   doc_fufir   Z
dexis_fufir   r   r   r   r   set_issuedate"   sB    




zDexis.set_issuedateTc                 C   s4   | j }t| j|  }t|}| j||d}|S )N)with_ns)r   listr   keysrandomchoicefetch)r	   r*   r   Zpapidspapidoctetsr   r   r   r-   J   s
    
zDexis.randomFc                 C   s   | j }| j| }||vr6td| d | tjd dS || }|d }|d }| j| | j|}|sn|S | jj	d 
d}	d	| jj	d
 
d d }
|dd dkrd|
 d |	 d |dd  }|S tddS )z-get the doc papid, assumed it is in issuedatezI don't see z in the dexis for r   Nslamfzutf-8s   xmlns:xsi="Zxsis   " r   
   s
   <text ref=s   <text s   xmlns="s   " ref="   zI have a namespace issue)r   r   r   r    r!   r   seekreadr   nsencode	Exception)r	   r0   r*   r   Z
dexis_dataZ
papid_datastartlengthr1   amf_nsZxsi_nsr   r   r   r/   Q   s0    

zDexis.fetchc                 C   s   |du r| j | }| jjd d | d }tj|rRtj|tj|krRdS tj|dd}t	| j| _
|j}tj|dd| _tjj | _d| _| j| j_| j| j_| j| j_i | _|| _| j| t| j| | jS )z/updates an issue by issuedate, with mtime checkNr   r   r   Fr   r   )r   r   r   r$   r%   r&   getmtimer"   r#   r   welfoname
check_filexmlparsersexpatParserCreatedocendstart_elementStartElementHandlerend_elementEndElementHandlerend_doc_checkCharacterDataHandlerr   xml_file	ParseFiler   dump)r	   r   r(   out_fufirN   Zxml_fufir   r   r   r   m   s(    



zDexis.update_issuec                 C   sF   |  | |dkrdS d|vr(d| _dS d| _|d | _| jj| _dS )z"expat handler for start of elementtextNrefTF)rL   skip_next_textrS   rE   CurrentByteIndexr<   )r	   rA   attrsr   r   r   rH      s    

zDexis.start_elementc                 C   s0   |  | |dkrdS | jr&d| _dS d| _dS )z expat handler for end of elementrR   NFT)rL   rT   rG   r	   rA   r   r   r   rJ      s    
zDexis.end_elementc                 C   sr   | j s
dS d| _ | jj| _| j| j | _| j| j| j| j i | j	| j
< | j| j	| j
 d< | j| j	| j
 d< dS )z?expat handler to check if we have reached the end of a documentNFr2   r3   )rG   rE   rU   endr<   r=   r@   checkrB   r   rS   rW   r   r   r   rL      s    
zDexis.end_doc_check)N)T)F)N)__name__
__module____qualname__r   r   r)   r-   r/   r   rH   rJ   rL   r   r   r   r   r      s   
(



r   )
r$   r"   r    r-   xml.parsers.expatrC   r   r@   r   r   r   r   r   r   <module>   s   