Ë
    Øªg¹P  ã                   ó<  — d dl Z d dlmZ d dlmZ d dlmZ d dlm	Z	 d„ Z G d„ d«      Z
d	„ Z	 dd„Z e j                  d«      Zdd„Zd„ Z	 dd„Zd„ Z e j                  de j&                  «      Z e j                  d«      Zd„ Zg d¢d
fd„Zd„ Zedk(  r e«        yy)é    N)Úaccuracy)Úmap_tag)Ú	str2tuple)ÚTreec                 ó¦   — g }g }|D ]=  }| j                  |j                  «       «      }|t        |«      z  }|t        |«      z  }Œ? t        ||«      S )a|  
    Score the accuracy of the chunker against the gold standard.
    Strip the chunk information from the gold standard and rechunk it using
    the chunker, then compute the accuracy score.

    :type chunker: ChunkParserI
    :param chunker: The chunker being evaluated.
    :type gold: tree
    :param gold: The chunk structures to score the chunker on.
    :rtype: float
    )ÚparseÚflattenÚtree2conlltagsÚ	_accuracy)ÚchunkerÚgoldÚ	gold_tagsÚ	test_tagsÚ	gold_treeÚ	test_trees         úD/var/www/openai/venv/lib/python3.12/site-packages/nltk/chunk/util.pyr   r      s\   € ð €IØ€IÛˆ	Ø—M‘M )×"3Ñ"3Ó"5Ó6ˆ	Ø”^ IÓ.Ñ.ˆ	Ø”^ IÓ.Ñ.‰	ð ô Y 	Ó*Ð*ó    c                   óf   — e Zd ZdZd„ Zd„ Zd„ Zd„ Zd„ Zd„ Z	dd„Z
d	„ Zd
„ Zd„ Zd„ Zd„ Zd„ Zd„ Zy)Ú
ChunkScorea;  
    A utility class for scoring chunk parsers.  ``ChunkScore`` can
    evaluate a chunk parser's output, based on a number of statistics
    (precision, recall, f-measure, misssed chunks, incorrect chunks).
    It can also combine the scores from the parsing of multiple texts;
    this makes it significantly easier to evaluate a chunk parser that
    operates one sentence at a time.

    Texts are evaluated with the ``score`` method.  The results of
    evaluation can be accessed via a number of accessor methods, such
    as ``precision`` and ``f_measure``.  A typical use of the
    ``ChunkScore`` class is::

        >>> chunkscore = ChunkScore()           # doctest: +SKIP
        >>> for correct in correct_sentences:   # doctest: +SKIP
        ...     guess = chunkparser.parse(correct.leaves())   # doctest: +SKIP
        ...     chunkscore.score(correct, guess)              # doctest: +SKIP
        >>> print('F Measure:', chunkscore.f_measure())       # doctest: +SKIP
        F Measure: 0.823

    :ivar kwargs: Keyword arguments:

        - max_tp_examples: The maximum number actual examples of true
          positives to record.  This affects the ``correct`` member
          function: ``correct`` will not return more than this number
          of true positive examples.  This does *not* affect any of
          the numerical metrics (precision, recall, or f-measure)

        - max_fp_examples: The maximum number actual examples of false
          positives to record.  This affects the ``incorrect`` member
          function and the ``guessed`` member function: ``incorrect``
          will not return more than this number of examples, and
          ``guessed`` will not return more than this number of true
          positive examples.  This does *not* affect any of the
          numerical metrics (precision, recall, or f-measure)

        - max_fn_examples: The maximum number actual examples of false
          negatives to record.  This affects the ``missed`` member
          function and the ``correct`` member function: ``missed``
          will not return more than this number of examples, and
          ``correct`` will not return more than this number of true
          negative examples.  This does *not* affect any of the
          numerical metrics (precision, recall, or f-measure)

        - chunk_label: A regular expression indicating which chunks
          should be compared.  Defaults to ``'.*'`` (i.e., all chunks).

    :type _tp: list(Token)
    :ivar _tp: List of true positives
    :type _fp: list(Token)
    :ivar _fp: List of false positives
    :type _fn: list(Token)
    :ivar _fn: List of false negatives

    :type _tp_num: int
    :ivar _tp_num: Number of true positives
    :type _fp_num: int
    :ivar _fp_num: Number of false positives
    :type _fn_num: int
    :ivar _fn_num: Number of false negatives.
    c                 ó´  — t        «       | _        t        «       | _        t        «       | _        t        «       | _        t        «       | _        |j                  dd«      | _        |j                  dd«      | _        |j                  dd«      | _	        |j                  dd«      | _
        d| _        d| _        d| _        d| _        d| _        d| _        d	| _        y )
NÚmax_tp_exampleséd   Úmax_fp_examplesÚmax_fn_examplesÚchunk_labelz.*r   g        F)ÚsetÚ_correctÚ_guessedÚ_tpÚ_fpÚ_fnÚgetÚ_max_tpÚ_max_fpÚ_max_fnÚ_chunk_labelÚ_tp_numÚ_fp_numÚ_fn_numÚ_countÚ_tags_correctÚ_tags_totalÚ_measuresNeedUpdate)ÚselfÚkwargss     r   Ú__init__zChunkScore.__init__r   s¯   € Ü›ˆŒÜ›ˆŒÜ“5ˆŒÜ“5ˆŒÜ“5ˆŒØ—z‘zÐ"3°SÓ9ˆŒØ—z‘zÐ"3°SÓ9ˆŒØ—z‘zÐ"3°SÓ9ˆŒØ"ŸJ™J }°dÓ;ˆÔØˆŒØˆŒØˆŒØˆŒØ ˆÔØˆÔà#(ˆÕ r   c                 ó|  — | j                   r°| j                  | j                  z  | _        | j                  | j                  z
  | _        | j                  | j                  z
  | _        t        | j                  «      | _        t        | j
                  «      | _        t        | j                  «      | _	        d| _         y y )NF)
r-   r   r   r   r!   r    Úlenr'   r(   r)   ©r.   s    r   Ú_updateMeasureszChunkScore._updateMeasures…   s   € Ø×#Ò#Ø—}‘} t§}¡}Ñ4ˆDŒHØ—}‘} t§}¡}Ñ4ˆDŒHØ—}‘} t§}¡}Ñ4ˆDŒHÜ˜tŸx™x›=ˆDŒLÜ˜tŸx™x›=ˆDŒLÜ˜tŸx™x›=ˆDŒLØ',ˆDÕ$ð $r   c           	      óú  — | xj                   t        || j                  | j                  «      z  c_         | xj                  t        || j                  | j                  «      z  c_        | xj                  dz  c_        d| _        	 t        |«      }t        |«      }| xj                  t        |«      z  c_        | xj                  t        d„ t        ||«      D «       «      z  c_
        y# t        $ r dx}}Y Œ]w xY w)aU  
        Given a correctly chunked sentence, score another chunked
        version of the same sentence.

        :type correct: chunk structure
        :param correct: The known-correct ("gold standard") chunked
            sentence.
        :type guessed: chunk structure
        :param guessed: The chunked sentence to be scored.
        é   T© c              3   ó2   K  — | ]  \  }}||k(  sŒd –— Œ y­w)r6   Nr7   )Ú.0ÚtÚgs      r   Ú	<genexpr>z#ChunkScore.score.<locals>.<genexpr>¨   s   è ø€ ð "
Ù;‘&1a¸qÀA»vŒAÑ;ùs   ‚N)r   Ú
_chunksetsr*   r&   r   r-   r
   Ú
ValueErrorr,   r2   r+   ÚsumÚzip)r.   ÚcorrectÚguessedÚcorrect_tagsÚguessed_tagss        r   ÚscorezChunkScore.score   sÖ   € ð 	Šœ G¨T¯[©[¸$×:KÑ:KÓLÑLØŠœ G¨T¯[©[¸$×:KÑ:KÓLÑLØŠqÑØ#'ˆÔ ð	-Ü)¨'Ó2ˆLÜ)¨'Ó2ˆLð 	×ÒœC Ó-Ñ-ÕØ×Òœcñ "
Ü ¨lÔ;ó"
ó 
ñ 	
Öøô ò 	-ð +-Ð,ˆLš<ð		-ús   ÂC* Ã*C:Ã9C:c                 óT   — | j                   dk(  ry| j                  | j                   z  S )zÁ
        Return the overall tag-based accuracy for all text that have
        been scored by this ``ChunkScore``, using the IOB (conll2000)
        tag encoding.

        :rtype: float
        r   r6   )r,   r+   r3   s    r   r   zChunkScore.accuracy¬   s,   € ð ×Ñ˜qÒ ØØ×!Ñ! D×$4Ñ$4Ñ4Ð4r   c                 ó~   — | j                  «        | j                  | j                  z   }|dk(  ry| j                  |z  S )z‰
        Return the overall precision for all texts that have been
        scored by this ``ChunkScore``.

        :rtype: float
        r   )r4   r'   r(   ©r.   Údivs     r   Ú	precisionzChunkScore.precision¸   ó;   € ð 	×ÑÔØl‰l˜TŸ\™\Ñ)ˆØ!Š8Øà—<‘< #Ñ%Ð%r   c                 ó~   — | j                  «        | j                  | j                  z   }|dk(  ry| j                  |z  S )z†
        Return the overall recall for all texts that have been
        scored by this ``ChunkScore``.

        :rtype: float
        r   ©r4   r'   r)   rH   s     r   ÚrecallzChunkScore.recallÆ   rK   r   c                 óš   — | j                  «        | j                  «       }| j                  «       }|dk(  s|dk(  ryd||z  d|z
  |z  z   z  S )a»  
        Return the overall F measure for all texts that have been
        scored by this ``ChunkScore``.

        :param alpha: the relative weighting of precision and recall.
            Larger alpha biases the score towards the precision value,
            while smaller alpha biases the score towards the recall
            value.  ``alpha`` should have a value in the range [0,1].
        :type alpha: float
        :rtype: float
        r   r6   )r4   rJ   rN   )r.   ÚalphaÚpÚrs       r   Ú	f_measurezChunkScore.f_measureÔ   sS   € ð 	×ÑÔØN‰NÓˆØK‰K‹MˆØŠ6Q˜!’VØØE˜A‘I  U¡¨a¡Ñ/Ñ0Ð0r   c                 ó|   — | j                  «        t        | j                  «      }|D cg c]  }|d   ‘Œ	 c}S c c}w )zÈ
        Return the chunks which were included in the
        correct chunk structures, but not in the guessed chunk
        structures, listed in input order.

        :rtype: list of chunks
        r6   )r4   Úlistr!   ©r.   ÚchunksÚcs      r   ÚmissedzChunkScore.missedç   s9   € ð 	×ÑÔÜd—h‘h“ˆÙ$Ó%™f˜!“˜fÑ%Ð%ùÒ%ó   ª9c                 ó|   — | j                  «        t        | j                  «      }|D cg c]  }|d   ‘Œ	 c}S c c}w )zÀ
        Return the chunks which were included in the guessed chunk structures,
        but not in the correct chunk structures, listed in input order.

        :rtype: list of chunks
        r6   )r4   rU   r    rV   s      r   Ú	incorrectzChunkScore.incorrectó   s9   € ð 	×ÑÔÜd—h‘h“ˆÙ$Ó%™f˜!“˜fÑ%Ð%ùÒ%rZ   c                 ó\   — t        | j                  «      }|D cg c]  }|d   ‘Œ	 c}S c c}w )z—
        Return the chunks which were included in the correct
        chunk structures, listed in input order.

        :rtype: list of chunks
        r6   )rU   r   rV   s      r   rA   zChunkScore.correctþ   ó.   € ô d—m‘mÓ$ˆÙ$Ó%™f˜!“˜fÑ%Ð%ùÒ%ó   š)c                 ó\   — t        | j                  «      }|D cg c]  }|d   ‘Œ	 c}S c c}w )z—
        Return the chunks which were included in the guessed
        chunk structures, listed in input order.

        :rtype: list of chunks
        r6   )rU   r   rV   s      r   rB   zChunkScore.guessed  r^   r_   c                 óT   — | j                  «        | j                  | j                  z   S )NrM   r3   s    r   Ú__len__zChunkScore.__len__  s!   € Ø×ÑÔØ|‰|˜dŸl™lÑ*Ð*r   c                 ó6   — dt        t        | «      «      z   dz   S )z`
        Return a concise representation of this ``ChunkScoring``.

        :rtype: str
        z<ChunkScoring of z chunks>)Úreprr2   r3   s    r   Ú__repr__zChunkScore.__repr__  s   € ð #¤T¬#¨d«)£_Ñ4°zÑAÐAr   c                 óÎ   — dd| j                  «       dz  d›dz   d| j                  «       dz  d›dz   d| j                  «       dz  d›dz   d| j                  «       dz  d›d	z   S )
a-  
        Return a verbose representation of this ``ChunkScoring``.
        This representation includes the precision, recall, and
        f-measure scores.  For other information about the score,
        use the accessor methods (e.g., ``missed()`` and ``incorrect()``).

        :rtype: str
        zChunkParse score:
z    IOB Accuracy: r   z5.1fz%%
z    Precision:    z    Recall:       z    F-Measure:    z%%)r   rJ   rN   rS   r3   s    r   Ú__str__zChunkScore.__str__  s’   € ð "Ø# D§M¡M£O°cÑ$9¸$Ð#?¸tÐDñFà# D§N¡NÓ$4°sÑ$:¸4Ð#@ÀÐEñGð $ D§K¡K£M°CÑ$7¸Ð#=¸TÐBñDð $ D§N¡NÓ$4°sÑ$:¸4Ð#@ÀÐCñ	Eð	
r   N)g      à?)Ú__name__Ú
__module__Ú__qualname__Ú__doc__r0   r4   rE   r   rJ   rN   rS   rY   r\   rA   rB   rb   re   rg   r7   r   r   r   r   3   sO   „ ñ<ò|)ò&-ò
ò:
5ò&ò&ó1ò&
&ò	&ò&ò&ò+òBó
r   r   c                 ó   — d}g }| D ]{  }t        |t        «      rdt        j                  ||j	                  «       «      r#|j                  ||f|j                  «       f«       |t        |j                  «       «      z  }Œw|dz  }Œ} t        |«      S )Nr   r6   )
Ú
isinstancer   ÚreÚmatchÚlabelÚappendÚfreezer2   Úleavesr   )r:   Úcountr   ÚposrW   Úchilds         r   r=   r=   2  sy   € Ø
€CØ€FÛˆÜeœTÔ"Üx‰x˜ U§[¡[£]Ô3Ø—‘  s˜|¨U¯\©\«^Ð<Ô=Ø”3u—|‘|“~Ó&Ñ&‰Cà1‰H‰Cð ô ˆv‹;Ðr   ÚSc                 óØ  — t        j                  d«      }t        |g «      g}|j                  | «      D ]	  }|j	                  «       }	|	d   dk(  r]t        |«      dk7  rt        d|j                  «       d›«      ‚t        |g «      }
|d   j                  |
«       |j                  |
«       Œy|	d   dk(  r<t        |«      d	k7  rt        d
|j                  «       d›«      ‚|j                  «        Œ½|€|d   j                  |	«       ŒÔt        |	|«      \  }}|r|rt        |||«      }|d   j                  ||f«       Œ t        |«      dk7  rt        dt        | «      d›«      ‚|d   S )aB  
    Divide a string of bracketted tagged text into
    chunks and unchunked tokens, and produce a Tree.
    Chunks are marked by square brackets (``[...]``).  Words are
    delimited by whitespace, and each word should have the form
    ``text/tag``.  Words that do not contain a slash are
    assigned a ``tag`` of None.

    :param s: The string to be converted
    :type s: str
    :param chunk_label: The label to use for chunk nodes
    :type chunk_label: str
    :param root_label: The label to use for the root of the tree
    :type root_label: str
    :rtype: Tree
    z\[|\]|[^\[\]\s]+r   Ú[r6   zUnexpected [ at char ÚdéÿÿÿÿÚ]é   zUnexpected ] at char zExpected ] at char )rn   Úcompiler   ÚfinditerÚgroupr2   r>   Ústartrq   Úpopr   r   )Úsr   Ú
root_labelÚsepÚsource_tagsetÚtarget_tagsetÚWORD_OR_BRACKETÚstackro   ÚtextÚchunkÚwordÚtags                r   Útagstr2treerŽ   ?  sX  € ô( —j‘jÐ!4Ó5€Oä*˜bÓ!Ð"€EØ ×)Ñ)¨!×,ˆØ{‰{‹}ˆØ‰7cŠ>Ü5‹z˜QŠÜ Ð#8¸¿¹»ÀqÐ8IÐ!JÓKÐKÜ˜ bÓ)ˆEØ"‰I×Ñ˜UÔ#ØL‰L˜ÕØ!‰W˜Š^Ü5‹z˜QŠÜ Ð#8¸¿¹»ÀqÐ8IÐ!JÓKÐKØI‰IKàˆ{Øb‘	× Ñ  Õ&ä% d¨CÓ0‘	cÙ ¡]Ü! -°ÀÓDCØb‘	× Ñ  $¨ Ö-ð' -ô* ˆ5ƒzQ‚ÜÐ.¬s°1«v°a¨jÐ9Ó:Ð:Ø‰8€Or   z(\S+)\s+(\S+)\s+([IOB])-?(\S+)?c                 ó0  — t        |g «      g}t        | j                  d«      «      D ]è  \  }}|j                  «       sŒt        j                  |«      }|€t        d|d›«      ‚|j                  «       \  }}}	}
||
|vrd}	|	dk(  xr |
|d   j                  «       k7  }|	dv s|rt        |«      dk(  r|j                  «        |	d	k(  s|r1t        |
g «      }|d   j                  |«       |j                  |«       |d   j                  ||f«       Œê |d
   S )a*  
    Return a chunk structure for a single sentence
    encoded in the given CONLL 2000 style string.
    This function converts a CoNLL IOB string into a tree.
    It uses the specified chunk types
    (defaults to NP, PP and VP), and creates a tree rooted at a node
    labeled S (by default).

    :param s: The CoNLL string to be converted.
    :type s: str
    :param chunk_types: The chunk types to be converted.
    :type chunk_types: tuple
    :param root_label: The node label to use for the root.
    :type root_label: str
    :rtype: Tree
    Ú
zError on line rz   ÚOÚIr{   ÚBOr}   ÚBr   )r   Ú	enumerateÚsplitÚstripÚ_LINE_REro   r>   Úgroupsrp   r2   r‚   rq   )rƒ   Úchunk_typesr„   r‰   ÚlinenoÚlinero   rŒ   r   ÚstateÚ
chunk_typeÚ
mismatch_Ir‹   s                r   Úconllstr2treer    u  s   € ô$ *˜bÓ!Ð"€Eä! !§'¡'¨$£-Ö0‰ˆØz‰zŒ|Øô —‘˜tÓ$ˆØˆ=Ü˜~¨f°Q¨ZÐ8Ó9Ð9Ø).¯©«Ñ&ˆˆsE˜:ð Ð" z¸Ñ'DØˆEð ˜c‘\ÒE j°E¸"±I·O±OÓ4EÑ&Eˆ
ØD‰=™JÜ5‹z˜QŠØ—	‘	”ð CŠ<™:Ü˜ RÓ(ˆEØ"‰I×Ñ˜UÔ#ØL‰L˜Ôð 	ˆb‰	×Ñ˜$ ˜Õ%ð9 1ð< ‰8€Or   c                 ó  — g }| D ]V  }	 |j                  «       }d}|D ]<  }t        |t        «      rt        d«      ‚|j	                  |d   |d   ||z   f«       d}Œ> ŒX |S # t
        $ r |j	                  |d   |d   df«       Y Œ€w xY w)zË
    Return a list of 3-tuples containing ``(word, tag, IOB-tag)``.
    Convert a tree to the CoNLL IOB tag format.

    :param t: The tree to be converted.
    :type t: Tree
    :rtype: list(tuple)
    úB-z7Tree is too deeply nested to be printed in CoNLL formatr   r6   úI-r‘   )rp   rm   r   r>   rq   ÚAttributeError)r:   Útagsrv   ÚcategoryÚprefixÚcontentss         r   r
   r
   ª  s¦   € ð €DÛˆð	3Ø—{‘{“}ˆHØˆFÛ!Ü˜h¬Ô-Ü$ØQóð ð —‘˜X a™[¨(°1©+°vÀÑ7HÐIÔJØ‘ñ "ð	 ð €Køô ò 	3ØK‰K˜˜q™ 5¨¡8¨SÐ1Ö2ð	3ús   ‰AA Á #BÂBc                 óR  — t        |g «      }| D ]  \  }}}|€!|rt        d«      ‚|j                  ||f«       Œ+|j                  d«      r"|j                  t        |dd ||fg«      «       Œ^|j                  d«      r€t	        |«      dk(  s,t        |d   t         «      r|d   j                  «       |dd k7  r/|rt        d«      ‚|j                  t        |dd ||fg«      «       ŒØ|d   j                  ||f«       Œï|dk(  r|j                  ||f«       Œ	t        d	|›«      ‚ |S )
z1
    Convert the CoNLL IOB format to a tree.
    NzBad conll tag sequencer¢   r}   r£   r   r{   r‘   zBad conll tag )r   r>   rq   Ú
startswithr2   rm   rp   )Úsentencerš   r„   ÚstrictÚtreerŒ   ÚpostagÚchunktags           r   Úconlltags2treer°   Å  s+  € ô 
˜BÓ€DÜ"*ÑˆˆfhØÐÙÜ Ð!9Ó:Ð:ð —‘˜T 6˜NÕ+Ø× Ñ  Ô&ØK‰Kœ˜X a b˜\¨T°6¨NÐ+;Ó<Õ=Ø× Ñ  Ô&äD“	˜Q’Ü! $ r¡(¬DÔ1Ø˜‘8—>‘>Ó# x°° |Ò3áÜ$Ð%=Ó>Ð>ð —K‘K¤ X¨a¨b \°T¸6°NÐ3CÓ DÕEàR‘—‘  v Õ/Ø˜Š_ØK‰K˜˜v˜Ö'ä˜~¨h¨\Ð:Ó;Ð;ð3 #+ð4 €Kr   c                 ó|   — t        | «      D cg c]  }dj                  |«      ‘Œ }}dj                  |«      S c c}w )zÒ
    Return a multiline string where each line contains a word, tag and IOB tag.
    Convert a tree to the CoNLL IOB string format

    :param t: The tree to be converted.
    :type t: Tree
    :rtype: str
    Ú r   )r
   Újoin)r:   ÚtokenÚliness      r   Útree2conllstrr¶   é  s;   € ô +9¸Ô*;Ó<Ñ*; ˆSX‰Xe_Ð*;€EÐ<Ø9‰9UÓÐùò =s   Ž9a   <DOC>\s*(<DOCNO>\s*(?P<docno>.+?)\s*</DOCNO>\s*)?(<DOCTYPE>\s*(?P<doctype>.+?)\s*</DOCTYPE>\s*)?(<DATE_TIME>\s*(?P<date_time>.+?)\s*</DATE_TIME>\s*)?<BODY>\s*(<HEADLINE>\s*(?P<headline>.+?)\s*</HEADLINE>\s*)?<TEXT>(?P<text>.*?)</TEXT>\s*</BODY>\s*</DOC>\s*z#<b_\w+\s+[^>]*?type="(?P<type>\w+)"c                 óz  — t        |g «      g}| €g S t        j                  d| «      D ]¾  }|j                  «       }	 |j	                  d«      rdt
        j                  |«      }|€t        d|«       t        |j                  d«      g «      }|d   j                  |«       |j                  |«       n6|j	                  d«      r|j                  «        n|d   j                  |«       ŒÀ t        |«      d
k7  rt        d«      ‚|d   S # t        t        f$ r$}t        d|j                  «       d›d	«      |‚d }~ww xY w)Nz<[^>]+>|[^\s<]+z<b_ÚXXXXÚtyper{   z<e_z$Bad IEER string (error at character rz   Ú)r6   zBad IEER stringr   )r   rn   r   r€   rª   Ú_IEER_TYPE_REro   Úprintrq   r‚   Ú
IndexErrorr>   r   r2   )rƒ   r„   r‰   Úpiece_mÚpieceÚmr‹   Úes           r   Ú_ieer_read_textrÂ     s,  € Ü*˜bÓ!Ð"€Eð 	€yØˆ	Ü—;‘;Ð1°1Ö5ˆØ—‘“ˆð	Ø×Ñ Ô&Ü!×'Ñ'¨Ó.Ø9Ü˜& %Ô(Ü˜QŸW™W V›_¨bÓ1Øb‘	× Ñ  Ô'Ø—‘˜UÕ#Ø×!Ñ! %Ô(Ø—	‘	•ð
 b‘	× Ñ  Ô'øð! 6ô* ˆ5ƒzQ‚ÜÐ*Ó+Ð+Ø‰8€Oøô œJÐ'ò 	ÜØ6°w·}±}³ÀqÐ6IÈÐKóàðûð	ús   ¼B+DÄD:ÄD5Ä5D:)	ÚLOCATIONÚORGANIZATIONÚPERSONÚDURATIONÚDATEÚCARDINALÚPERCENTÚMONEYÚMEASUREc           	      ó  — t         j                  | «      }|rgt        |j                  d«      |«      |j                  d«      |j                  d«      |j                  d«      t        |j                  d«      |«      dœS t        | |«      S )ap  
    Return a chunk structure containing the chunked tagged text that is
    encoded in the given IEER style string.
    Convert a string of chunked tagged text in the IEER named
    entity format into a chunk structure.  Chunks are of several
    types, LOCATION, ORGANIZATION, PERSON, DURATION, DATE, CARDINAL,
    PERCENT, MONEY, and MEASURE.

    :rtype: Tree
    rŠ   ÚdocnoÚdoctypeÚ	date_timeÚheadline)rŠ   rÍ   rÎ   rÏ   rÐ   )Ú_IEER_DOC_REro   rÂ   r€   )rƒ   rš   r„   rÀ   s       r   Úieerstr2treerÒ   '  s{   € ô8 	×Ñ˜1Ó€AÙä# A§G¡G¨F£O°ZÓ@Ø—W‘W˜WÓ%Ø—w‘w˜yÓ)ØŸ™ Ó-ô (¨¯©°
Ó(;¸ZÓHñ
ð 	
ô ˜q *Ó-Ð-r   c                  ó.  — d} dd l }|j                  j                  | d¬«      }|j                  «        t	        «        d} t        | d¬«      }|j                  «        t	        d«       t	        |j                  j                  |«      «       t	        «        y )	Nzd[ Pierre/NNP Vinken/NNP ] ,/, [ 61/CD years/NNS ] old/JJ ,/, will/MD join/VB [ the/DT board/NN ] ./.r   ÚNP)r   av  
These DT B-NP
research NN I-NP
protocols NNS I-NP
offer VBP B-VP
to TO B-PP
the DT B-NP
patient NN I-NP
not RB O
only RB O
the DT B-NP
very RB I-NP
best JJS I-NP
therapy NN I-NP
which WDT B-NP
we PRP B-NP
have VBP B-VP
established VBN I-VP
today NN B-NP
but CC B-NP
also RB I-NP
the DT B-NP
hope NN I-NP
of IN B-PP
something NN B-NP
still RB B-ADJP
better JJR I-ADJP
. . O
)rÔ   ÚPP)rš   zCoNLL output:)Únltkr‹   rŽ   Úpprintr¼   r    r¶   )rƒ   rÖ   r:   Ú
conll_trees       r   ÚdemorÙ   R  sx   € Øn€AÛà
‰
×Ñ˜q¨dÐÓ3€AØ‡HH„JÜ	„Gð	€Aô< ˜q¨lÔ;€JØ×ÑÔô 
ˆ/ÔÜ	ˆ$*‰*×
"Ñ
" :Ó
.Ô/Ü	…Gr   Ú__main__)rÔ   rw   Ú/NN)©rÔ   rÕ   ÚVPrw   )rÜ   rw   F)rn   Únltk.metricsr   r   Únltk.tag.mappingr   Únltk.tag.utilr   Ú	nltk.treer   r   r=   rŽ   r~   r˜   r    r
   r°   r¶   ÚDOTALLrÑ   r»   rÂ   rÒ   rÙ   rh   r7   r   r   Ú<module>rã      sÍ   ðó 
å .Ý $Ý #Ý ò+÷<z
ñ z
ò~
ð UYó.ðf ˆ2:‰:Ð8Ó9€ó2òjð8 FKó!òH
ð ˆrz‰zðð ‡IIó
€ð —
‘
ÐAÓB€òòD
ð ó(.òV,ð^ ˆzÒÙ…Fð r   