
    gf                     f    d Z ddlZddl dddddd	d
Z ee      Z G d d      Z G d de      Zy)a)  
Corpus reader for the Information Extraction and Entity Recognition Corpus.

NIST 1999 Information Extraction: Entity Recognition Evaluation
https://www.itl.nist.gov/iad/894.01/tests/ie-er/er_99/er_99.htm

This corpus contains the NEWSWIRE development test data for the
NIST 1999 IE-ER Evaluation.  The files were taken from the
subdirectory: ``/ie_er_99/english/devtest/newswire/*.ref.nwt``
and filenames were shortened.

The corpus contains the following files: APW_19980314, APW_19980424,
APW_19980429, NYT_19980315, NYT_19980403, and NYT_19980407.
    N)*z&Associated Press Weekly, 14 March 1998z&Associated Press Weekly, 24 April 1998z&Associated Press Weekly, 29 April 1998zNew York Times, 15 March 1998zNew York Times, 3 April 1998zNew York Times, 7 April 1998)APW_19980314APW_19980424APW_19980429NYT_19980315NYT_19980403NYT_19980407c                       e Zd ZddZd Zy)IEERDocumentNc                 J    || _         || _        || _        || _        || _        y N)textdocnodoctype	date_timeheadline)selfr   r   r   r   r   s         L/var/www/openai/venv/lib/python3.12/site-packages/nltk/corpus/reader/ieer.py__init__zIEERDocument.__init__+   s%    	
"     c                 L   | j                   r*dj                  | j                   j                               }nFdj                  | j                  j                         D cg c]  }|d d dk7  s| c}d d       dz   }| j                  d| j                   d|dS d	|z  S c c}w )
N    <   z...z<IEERDocument z: >z<IEERDocument: %r>)r   joinleavesr   r   )r   r   ws      r   __repr__zIEERDocument.__repr__2   s    ==xx 4 4 67H TYY%5%5%7H%71Ra5C<!%7H"MNQVV  ::!#DJJ<r(Q??'(22 Is   B!,B!)NNN )__name__
__module____qualname__r   r     r   r   r   r   *   s    !
3r   r   c                   2    e Zd ZdZddZddZd Zd Zd Zy)	IEERCorpusReaderr   Nc                     t        | j                  |d      D cg c]  \  }}t        || j                  |       c}}      S c c}}w NT)encoding)concatabspathsStreamBackedCorpusView_read_blockr   fileidsfileidencs       r   docszIEERCorpusReader.docsB   sO     &*]]7D%A%AMVS 'vt/?/?#N%A
 	
   "A
c                     t        | j                  |d      D cg c]  \  }}t        || j                  |       c}}      S c c}}w r)   )r+   r,   r-   _read_parsed_blockr/   s       r   parsed_docszIEERCorpusReader.parsed_docsJ   sP     &*]]7D%A%AMVS 'vt/F/FQTU%A
 	
r4   c                     | j                  |      D cg c].  }| j                  |      j                  | j                  |      0 c}S c c}w r   )r.   _parser   )r   streamdocs      r   r6   z#IEERCorpusReader._read_parsed_blockR   sR     ''/
/{{3%%1 KK/
 	
 
s   3A
c                     t         j                  j                  |d      }t        |t              rt        di |S t        |      S )NDOCUMENT)
root_labelr%   )nltkchunkieerstr2tree
isinstancedictr   )r   r;   vals      r   r9   zIEERCorpusReader._parseZ   s>    jj%%cj%Ac4 &#&&$$r   c                    g }	 |j                         }|sn|j                         dk(  rn(|j                  |       	 |j                         }|sn&|j                  |       |j                         dk(  rn9dj                  |      gS )Nz<DOC>z</DOC>
)readlinestripappendr   )r   r:   outlines       r   r.   zIEERCorpusReader._read_blocka   s    ??$Dzz|w&  	

4??$DJJtzz|x'  		#r   r   )	r"   r#   r$   __doc__r3   r7   r6   r9   r.   r%   r   r   r'   r'   ?   s    


% r   r'   )	rL   r?   nltk.corpus.reader.apititlessorted	documentsr   CorpusReaderr'   r%   r   r   <module>rR      sN     $
 =<<322
 6N	3 3*5 | 5 r   