ó
H-!^c           @   sˆ   d  d l  Z  d  d l Z d  d l Z d  d l m Z d  d l m Z d „  Z d „  Z d „  Z d „  Z	 d „  Z
 d	 d d
 „  ƒ  YZ d S(   iÿÿÿÿN(   t   datetime(   t   transliteratec         C   sO   t  j d |  ƒ } | r9 | j d ƒ } | j d ƒ } n t } t } | | f S(   Ns   ;([^{]*)\{([^}]*)\}i   i   (   t   ret   searcht   groupt   False(   t   metalinet   mt   tagt   value(    (    sP   /media/Expansion Drive/c_drive/xampp/htdocs/sanskrit-lexica-ocr/scripts/utils.pyt   extract_tag   s    c         C   sŽ   |  j  d ƒ } i  } xr | D]j } | j d ƒ s† | d k r@ q | j d ƒ s\ d GH| GHq t | ƒ \ } } | r | | | <q d GHq W| S(   s6   Extract metadata from the text file and return a dict.s   
s	   ;METADATAt    t   ;s/   Check line. Does not follow metadata structure.(   t   splitt
   startswithR
   (   t   metatextt   linest   metadatat   lineR   R	   (    (    sP   /media/Expansion Drive/c_drive/xampp/htdocs/sanskrit-lexica-ocr/scripts/utils.pyt   prepare_metadata   s    	c         C   sa   g  } |  j  d ƒ } x< | D]4 } | j d ƒ sP | j d ƒ rC q | j | ƒ q Wd j | ƒ S(   s%   Remove page markers and line markers.s   
s   ;p{s   ;l{(   R   R   t   appendt   join(   t   contentt   resultR   R   (    (    sP   /media/Expansion Drive/c_drive/xampp/htdocs/sanskrit-lexica-ocr/scripts/utils.pyt   remove_page_line3   s    c         C   s5   t  j d d d ƒ  } t j | ƒ } Wd  QX| |  S(   Ns   dictcode.jsont   rs   utf-8(   t   codecst   opent   jsont   load(   t   codet   fint   dictdata(    (    sP   /media/Expansion Drive/c_drive/xampp/htdocs/sanskrit-lexica-ocr/scripts/utils.pyt   code_to_dictE   s    c           C   s
   t  j ƒ  S(   N(   R    t   now(    (    (    sP   /media/Expansion Drive/c_drive/xampp/htdocs/sanskrit-lexica-ocr/scripts/utils.pyt	   timestampJ   s    t	   VerseInfoc           B   s_   e  Z d  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z d „  Z	 d „  Z
 d	 „  Z RS(
   s;   Hold the information regarding current verse being handled.c         C   sU   d |  _  d |  _ d |  _ d |  _ d |  _ d |  _ d |  _ d |  _ d |  _ d S(   s   Initialize with default values.R   i   i    N(	   t   kandat   vargat   subvargat   kandaNumt   vargaNumt   subvargaNumt   pageNumt   verseNumt   lastVerseNum(   t   self(    (    sP   /media/Expansion Drive/c_drive/xampp/htdocs/sanskrit-lexica-ocr/scripts/utils.pyt   __init__Q   s    								c         C   s   | |  _  d S(   s   Upadate pageNum.N(   R+   (   R.   R+   (    (    sP   /media/Expansion Drive/c_drive/xampp/htdocs/sanskrit-lexica-ocr/scripts/utils.pyt   update_pageNum]   s    c         C   s   | |  _  |  j d 7_ d S(   s(   Update subvarga. Also identify its name.i   N(   R'   R*   (   R.   R'   (    (    sP   /media/Expansion Drive/c_drive/xampp/htdocs/sanskrit-lexica-ocr/scripts/utils.pyt   update_subvargaa   s    	c         C   s.   d |  _  d |  _ | |  _ |  j d 7_ d S(   s%   Update varga. Reset subvargaNum to 1.R   i   N(   R'   R*   R&   R)   (   R.   R&   (    (    sP   /media/Expansion Drive/c_drive/xampp/htdocs/sanskrit-lexica-ocr/scripts/utils.pyt   update_vargaf   s    			c         C   s@   d |  _  d |  _ d |  _ d |  _ | |  _ |  j d 7_ d S(   s2   Update kanda. Reset vargaNum and subvargaNum to 1.R   i   N(   R'   R*   R&   R)   R%   R(   (   R.   R%   (    (    sP   /media/Expansion Drive/c_drive/xampp/htdocs/sanskrit-lexica-ocr/scripts/utils.pyt   update_kandam   s    					c         C   s_   t  j d | ƒ } | rK t t | j d ƒ d d ƒ ƒ |  _ |  j |  _ n |  j d |  _ d S(   s9   Identify the verse number from verse and update verseNum.s?   [à¥¥|..] ([0123456789à¥¦à¥§à¥¨à¥©à¥ªà¥«à¥¬à¥­à¥®à¥¯]+) [à¥¥|..]i   t
   devanagarit   slp1N(   R   R   t   intR   R   R-   R,   (   R.   t   verseR   (    (    sP   /media/Expansion Drive/c_drive/xampp/htdocs/sanskrit-lexica-ocr/scripts/utils.pyt   update_verseNumv   s
    $c         C   s.   |  j  d |  j d |  j d t |  j ƒ S(   s;   Return names of kanda, varga, subvarga and number of verse.t   .(   R%   R&   R'   t   strR,   (   R.   (    (    sP   /media/Expansion Drive/c_drive/xampp/htdocs/sanskrit-lexica-ocr/scripts/utils.pyt   give_verse_details€   s    c         C   s   |  j  S(   s   Return pageNum.(   R+   (   R.   (    (    sP   /media/Expansion Drive/c_drive/xampp/htdocs/sanskrit-lexica-ocr/scripts/utils.pyt   give_page_details„   s    c         C   s@   t  |  j ƒ d t  |  j ƒ d t  |  j ƒ d t  |  j ƒ S(   s3   Return numbers of kanda, varga, subvarga and verse.R9   (   R:   R(   R)   R*   R,   (   R.   (    (    sP   /media/Expansion Drive/c_drive/xampp/htdocs/sanskrit-lexica-ocr/scripts/utils.pyt   give_verse_num_detailsˆ   s    (   t   __name__t
   __module__t   __doc__R/   R0   R1   R2   R3   R8   R;   R<   R=   (    (    (    sP   /media/Expansion Drive/c_drive/xampp/htdocs/sanskrit-lexica-ocr/scripts/utils.pyR$   N   s   							
		(    (   R   R   R   R    t   indic_transliteration.sanscriptR   R
   R   R   R!   R#   R$   (    (    (    sP   /media/Expansion Drive/c_drive/xampp/htdocs/sanskrit-lexica-ocr/scripts/utils.pyt   <module>   s   					