
    sf                     T    d dl Z d dlZd dlZd dlZd dlZd dlZd dlmZ  G d d      Z	y)    N)Welfoc                   J    e Zd Zd Zd ZddZddZddZddZd Z	d	 Z
d
 Zy)Dexisc                 J    || _         i | _        d | _        d | _        d | _        y N)edata	issuedatedoc_file
issuefufis)selferimps     !/home/ernad/ernad/python/dexis.py__init__zDexis.__init__   s*    	
     c                     | j                   }|j                  j                  |j                  d         }|| _        |D ]  }| j                  |        y)z-> update for all issue filesissuesN)r   ddates_uniquedirsr   update_issue)r   r   r   r
   s       r   update_all_issueszDexis.update_all_issues   sG    FFSS%%affX&67
$#Ii( $r   Nc                 V   || j                   k(  ry|| _         | j                  }|t        j                  |      }| j                  j                  d   dz   |z   dz   }| j
                  /|j                  j                  |j                  d         }|| _        |t        d|z          nt        d       || j
                  vr"| t        d	| d
t        j                         y|| j
                  |   }nt        j                  |      }t        j                  |d      }|| _        || j                  vrst        j                   j#                  |      s$|!t        d|z   dz   t        j                         yt        d|z          t        j$                  |      | j                  |<   y)z3run this before working on a file, loads dexis dataTNdexis/.json.gzr   zdexis gets doc_fufi zdexis gets no doc_fufizdexis sees no issue fufi for .filerbmodez
dexis: no z, run dexis.zdexis loads )r
   r   filerget_issuedater   r   r   r   printsysstderrgzipopenr   r	   ospathisfileload)r   r
   doc_fufir   
dexis_fufir   r   s          r   set_issuedatezDexis.set_issuedate&   sv   &"FF ++H5IVV[[)C/);jH
??"))!&&*:;J(DO(834*+DOO+0@1)A>zz#y1H++H5I99XD1 DII%77>>*-#,3nD"zz+.:-.#(::j#9DIIi r   c                     | j                   }t        | j                  |   j                               }t	        j
                  |      }| j                  ||      }|S )N)with_ns)r
   listr	   keysrandomchoicefetch)r   r2   r
   papidspapidoctetss         r   r5   zDexis.randomN   sL    NN	dii	*//12f%E73r   c                    | j                   }| j                  |   }||vr%t        d|z   dz   |z   t        j                         y||   }|d   }|d   }| j
                  j                  |       | j
                  j                  |      }|s|S | j                  j                  d   j                  d      }	d	| j                  j                  d
   j                  d      z   dz   }
|dd dk(  rd|
z   dz   |	z   dz   |dd z   }|S t        d      )z-get the doc papid, assumed it is in issuedatezI don't see z in the dexis for r   Nslamfzutf-8s   xmlns:xsi="xsis   " r   
   s
   <text ref=s   <text s   xmlns="s   " ref="   zI have a namespace issue)r
   r	   r%   r&   r'   r   seekreadr   nsencode	Exception)r   r9   r2   r
   
dexis_data
papid_datastartlengthr:   amf_nsxsi_nss              r   r7   zDexis.fetchU   s   NN	
 YYy)

".5(+??)Kzz#&
3C5!##F+M5!((1$&&))E"2"9"9'"BBUJ!B<=('*4v=#BC[)FM677r   c                    || j                   |   }| j                  j                  d   dz   |z   dz   }t        j                  j                  |      r@t        j                  j                  |      t        j                  j                  |      kD  ryt        j                  |d      }t        | j                        | _
        |j                  }t        j                  |d      | _        t        j                  j                  j!                         | _        d| _        | j$                  | j                  _        | j(                  | j                  _        | j,                  | j                  _        i | _        || _        | j                  j5                  |       t7        j8                  | j0                  |       | j0                  S )z/updates an issue by issuedate, with mtime checkr   r   r   Fr    r!   )r   r   r   r*   r+   r,   getmtimer(   r)   r   welfoname
check_filexmlparsersexpatParserCreatedocendstart_elementStartElementHandlerend_elementEndElementHandlerend_doc_checkCharacterDataHandlerr	   xml_file	ParseFiler#   dump)r   r
   r.   out_fufir]   xml_fufis         r   r   zDexis.update_issueq   s=   y1H66;;w'#-	9JF77>>(#ww)BGG,<,<X,FF99XD1466]
==))H48[[&&335
)-););

&'+'7'7

$*.*<*<

'	 

X&

499h'yyr   c                     | j                  |       |dk7  ryd|vrd| _        yd| _        |d   | _        | j                  j                  | _        y)z"expat handler for start of elementtextNrefTF)r[   skip_next_textrd   rT   CurrentByteIndexrI   )r   rP   attrss      r   rW   zDexis.start_element   sQ    4 6>"&D#<ZZ00
r   c                 h    | j                  |       |dk7  ry| j                  rd| _        yd| _        y)z expat handler for end of elementrc   NFT)r[   re   rV   r   rP   s     r   rY   zDexis.end_element   s5    4 6>"'Dr   c                    | j                   syd| _         | j                  j                  | _        | j                  | j                  z
  | _        | j                  j                  | j                  | j                  | j
                         i | j                  | j                  <   | j                  | j                  | j                     d<   | j
                  | j                  | j                     d<   y)z?expat handler to check if we have reached the end of a documentNFr<   r=   )rV   rT   rf   endrI   rJ   rO   checkrQ   r	   rd   ri   s     r   r[   zDexis.end_doc_check   s    {{::..hh+

$**dkkB 		$((#'::		$((C #';;		$((C r   r   )T)F)__name__
__module____qualname__r   r   r0   r5   r7   r   rW   rY   r[    r   r   r   r      s0     )&P8841/r   r   )
r*   r(   r&   r5   xml.parsers.expatrR   r#   rO   r   r   rp   r   r   <module>rr      s&    	  
    a/ a/r   