
    gi                     X    d Z ddl ddl ddlmZmZ  G d de      Z G d de      Z	y)	z
Indian Language POS-Tagged Corpus
Collected by A Kumaran, Microsoft Research, India
Distributed with permission

Contents:
  - Bangla: IIT Kharagpur
  - Hindi: Microsoft Research India
  - Marathi: IIT Bombay
  - Telugu: IIIT Hyderabad
    )*)map_tag	str2tuplec                   0    e Zd ZdZddZddZddZddZy)	IndianCorpusReaderz@
    List of words, one per line.  Blank lines are ignored.
    Nc                     t        | j                  |d      D cg c]  \  }}t        ||dd       c}}      S c c}}w NTFconcatabspathsIndianCorpusViewselffileidsfileidencs       N/var/www/openai/venv/lib/python3.12/site-packages/nltk/corpus/reader/indian.pywordszIndianCorpusReader.words   sK     &*]]7D%A%AMVS !eU;%A
 	
   ;
c                      r j                   k7  r fd}nd }t         j                  |d      D cg c]  \  }}t        ||dd|       c}}      S c c}}w )Nc                 2    t        j                  |       S Nr   _tagsettr   tagsets    r   <lambda>z1IndianCorpusReader.tagged_words.<locals>.<lambda>)       WT\\61-M    TFr   r   r   r   r   r   r   tag_mapping_functionr   r   s   ` `   r   tagged_wordszIndianCorpusReader.tagged_words'   si    f,#M #'  &*]]7D%A%AMVS !dE;OP%A
 	
   A
c                     t        | j                  |d      D cg c]  \  }}t        ||dd       c}}      S c c}}w r	   r
   r   s       r   sentszIndianCorpusReader.sents3   sK     &*]]7D%A%AMVS !eT:%A
 	
r   c                      r j                   k7  r fd}nd }t         j                  |d      D cg c]  \  }}t        ||dd|       c}}      S c c}}w )Nc                 2    t        j                  |       S r   r   r   s    r   r   z1IndianCorpusReader.tagged_sents.<locals>.<lambda>=   r   r    Tr!   r"   s   ` `   r   tagged_sentszIndianCorpusReader.tagged_sents;   si    f,#M #'  &*]]7D%A%AMVS !dD:NO%A
 	
r%   r   )NN)__name__
__module____qualname____doc__r   r$   r'   r*    r    r   r   r      s    





r    r   c                       e Zd Z	 ddZd Zy)r   Nc                 ^    || _         || _        || _        t        j	                  | ||       y )N)encoding)_tagged_group_by_sent_tag_mapping_functionStreamBackedCorpusView__init__)r   corpus_filer2   taggedgroup_by_sentr#   s         r   r7   zIndianCorpusView.__init__I   s0     +%9"''kH'Mr    c                 ~   |j                         }|j                  d      rg S |j                         D cg c]  }t        |d       }}| j                  r%|D cg c]  \  }}|| j	                  |      f }}}| j
                  s|D cg c]  \  }}|	 }}}| j                  r|gS |S c c}w c c}}w c c}}w )N<_)sep)readline
startswithsplitr   r5   r3   r4   )r   streamlinewordsentwr   s          r   
read_blockzIndianCorpusView.read_blockQ   s     ??3I59ZZ\B\T	$C(\B%%EIJT6AqQ22156TDJ||$()D&1aADD)6MK CJ)s   B.B3B9r   )r+   r,   r-   r7   rG   r/   r    r   r   r   H   s    QUNr    r   N)
r.   nltk.corpus.reader.apinltk.corpus.reader.utilnltk.tagr   r   CorpusReaderr   r6   r   r/   r    r   <module>rL      s/   
 % % '+
 +
\- r    