
    g`                     F    d Z ddlZddlZddlmZ ddlmZ  G d de      Zy)z
An NLTK interface to the VerbNet verb lexicon

For details about VerbNet see:
https://verbs.colorado.edu/~mpalmer/projects/verbnet.html
    N)defaultdict)XMLCorpusReaderc                   F   e Zd ZdZd"dZ ej                  d      Z	  ej                  d      Z	  ej                  d      Z		 d#dZ
d#dZd$d	Zd
 Zd#dZd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd%dZd%dZd%dZd%dZd%dZ d%dZ!d%dZ"d%d Z#d%d!Z$y)&VerbnetCorpusReadera  
    An NLTK interface to the VerbNet verb lexicon.

    From the VerbNet site: "VerbNet (VN) (Kipper-Schuler 2006) is the largest
    on-line verb lexicon currently available for English. It is a hierarchical
    domain-independent, broad-coverage verb lexicon with mappings to other
    lexical resources such as WordNet (Miller, 1990; Fellbaum, 1998), XTAG
    (XTAG Research Group, 2001), and FrameNet (Baker et al., 1998)."

    For details about VerbNet see:
    https://verbs.colorado.edu/~mpalmer/projects/verbnet.html
    c                     t        j                  | |||       t        t              | _        	 t        t              | _        	 i | _        	 i | _        | j                          y N)	r   __init__r   list_lemma_to_class_wordnet_to_class_class_to_fileid_shortid_to_longid_quick_index)selfrootfileids
wrap_etrees       O/var/www/openai/venv/lib/python3.12/site-packages/nltk/corpus/reader/verbnet.pyr	   zVerbnetCorpusReader.__init__%   sd      tWjA*40	& "-T!2	/ !#	B #%
 	    z([^\-\.]*)-([\d+.\-]+)$z
[\d+.\-]+$zH<MEMBER name="\??([^"]+)" wn="([^"]*)"[^>]+>|<VNSUBCLASS ID="([^"]+)"/?>Nc                     |#t        | j                  j                               S t        |t              r| j                  |      }|j                  d      D cg c]  }|j                  d       c}S c c}w )zx
        Return a list of all verb lemmas that appear in any class, or
        in the ``classid`` if specified.
        MEMBERS/MEMBERname)sortedr   keys
isinstancestrvnclassfindallget)r   r   members      r   lemmaszVerbnetCorpusReader.lemmasH   sj    
 ?$..33566 '3',,w/5<__EU5VW5V6FJJv&5VWWWs   A5c                     |#t        | j                  j                               S t        |t              r| j                  |      }t        d |j                  d      D        g       S )z|
        Return a list of all wordnet identifiers that appear in any
        class, or in ``classid`` if specified.
        c              3   \   K   | ]$  }|j                  d d      j                          & yw)wn N)r   split).0r    s     r   	<genexpr>z1VerbnetCorpusReader.wordnetids.<locals>.<genexpr>a   s,      "C JJtR(..0"Cs   *,r   )r   r   r   r   r   r   sumr   )r   r   s     r   
wordnetidszVerbnetCorpusReader.wordnetidsU   sf    
 ?$0055788 '3',,w/")//2B"C  r   c                    |3| j                   j                         D cg c]  \  }}||k(  s| c}}S || j                  |   S || j                  |   S |?| j	                  |      }|j                  d      D cg c]  }|j                  d       c}S t        | j                   j                               S c c}}w c c}w )a  
        Return a list of the VerbNet class identifiers.  If a file
        identifier is specified, then return only the VerbNet class
        identifiers for classes (and subclasses) defined by that file.
        If a lemma is specified, then return only VerbNet class
        identifiers for classes that contain that lemma as a member.
        If a wordnetid is specified, then return only identifiers for
        classes that contain that wordnetid as a member.  If a classid
        is specified, then return only identifiers for subclasses of
        the specified VerbNet class.
        If nothing is specified, return all classids within VerbNet
        SUBCLASSES/VNSUBCLASSID)	r   itemsr   r   r   r   r   r   r   )	r   lemma	wordnetidfileidclassidcfxmltreesubclasss	            r   classidszVerbnetCorpusReader.classidsh   s     $($9$9$?$?$AQ$A&1aQ&[A$AQQ''..")))44 ll7+G !(0G H HH T" H 
 $//44677 Rs   B<B<>Cc                    || j                   v r| j                  |      S | j                  |      }|| j                  v ru| j                  | j                  |         }| j                  |      }||j	                  d      k(  r|S |j                  d      D ]  }||j	                  d      k(  s|c S  J t        d|       )a  Returns VerbNet class ElementTree

        Return an ElementTree containing the xml for the specified
        VerbNet class.

        :param fileid_or_classid: An identifier specifying which class
            should be returned.  Can be a file identifier (such as
            ``'put-9.1.xml'``), or a VerbNet class identifier (such as
            ``'put-9.1'``) or a short VerbNet class identifier (such as
            ``'9.1'``).
        r-   z.//VNSUBCLASSzUnknown identifier )_fileidsxmllongidr   r   r   
ValueError)r   fileid_or_classidr2   r1   treer6   s         r   r   zVerbnetCorpusReader.vnclass   s     -88-.. ++/0d+++**4;;w+?@F88F#D$((4.( $_ =H(,,t"44' !> !5 23D2EFGGr   c                     || j                   S t        |t              r| j                  | j	                  |         gS |D cg c]   }| j                  | j	                  |         " c}S c c}w )z
        Return a list of fileids that make up this corpus.  If
        ``vnclass_ids`` is specified, then return the fileids that make
        up the specified VerbNet class(es).
        )r9   r   r   r   r;   )r   vnclass_ids
vnclass_ids      r   r   zVerbnetCorpusReader.fileids   sv     == S)))$++k*BCDD #."-J %%dkk*&=>"-  s   %A*c           	          t        |t              r| j                  |      }g }|j                  d      }|D ]T  }|j	                  | j                  |      | j                  |      | j                  |      | j                  |      d       V |S )ap  Given a VerbNet class, this method returns VerbNet frames

        The members returned are:
        1) Example
        2) Description
        3) Syntax
        4) Semantics

        :param vnclass: A VerbNet class identifier; or an ElementTree
            containing the xml contents of a VerbNet class.
        :return: frames - a list of frame dictionaries
        zFRAMES/FRAME)exampledescriptionsyntax	semantics)	r   r   r   r   append_get_example_within_frame_get_description_within_frame _get_syntactic_list_within_frame_get_semantics_within_frame)r   r   framesvnframesvnframes        r   rL   zVerbnetCorpusReader.frames   s     gs#ll7+G??>2GMM#==gF#'#E#Eg#N"CCGL!%!A!A'!J	   r   c                     t        |t              r| j                  |      }|j                  d      D cg c]  }|j	                  d       }}|S c c}w )aA  Returns subclass ids, if any exist

        Given a VerbNet class, this method returns subclass ids (if they exist)
        in a list of strings.

        :param vnclass: A VerbNet class identifier; or an ElementTree
            containing the xml contents of a VerbNet class.
        :return: list of subclasses
        r,   r-   )r   r   r   r   r   )r   r   r6   
subclassess       r   rP   zVerbnetCorpusReader.subclasses   s[     gs#ll7+G 07?V/W
/W8HLL/W 	 
 
s   Ac                 D   t        |t              r| j                  |      }g }|j                  d      D ]c  }|j	                  |j                  d      |j                  d      D cg c]%  }|j                  d      |j                  d      d' c}d       e |S c c}w )ab  Returns thematic roles participating in a VerbNet class

        Members returned as part of roles are-
        1) Type
        2) Modifiers

        :param vnclass: A VerbNet class identifier; or an ElementTree
            containing the xml contents of a VerbNet class.
        :return: themroles: A list of thematic roles in the VerbNet class
        zTHEMROLES/THEMROLEtypeSELRESTRS/SELRESTRValuevaluerR   )rR   	modifiers)r   r   r   r   rG   r   )r   r   	themrolestrolerestrs        r   rX   zVerbnetCorpusReader.themroles   s     gs#ll7+G	__%9:E!IIf- &+]]3G%H"%HE #())G"4eii>OP%H" ; "s   &*Bc                 h    | j                   D ]#  }| j                  | j                  |      |       % y)aC  
        Initialize the indexes ``_lemma_to_class``,
        ``_wordnet_to_class``, and ``_class_to_fileid`` by scanning
        through the corpus fileids.  This is fast if ElementTree
        uses the C implementation (<0.1 secs), but quite slow (>10 secs)
        if only the python implementation is available.
        N)r9   _index_helperr:   )r   r1   s     r   _indexzVerbnetCorpusReader._index  s+     mmFtxx/8 $r   c                    |j                  d      }|| j                  |<   || j                  | j                  |      <   |j	                  d      D ]r  }| j
                  |j                  d         j                  |       |j                  dd      j                         D ]   }| j                  |   j                  |       " t |j	                  d      D ]  }| j                  ||        y)zHelper for ``_index()``r-   r   r   r$   r%   r,   N)
r   r   r   shortidr   r   rG   r&   r   r\   )r   r5   r1   r   r    r$   r6   s          r   r\   z!VerbnetCorpusReader._index_helper  s    ++d#)/g&9@W 56oo&67F  F!34;;GDjjr*002&&r*11': 3 8  (?@Hx0 Ar   c                    | j                   D ],  }|dd }|| j                  |<   || j                  | j                  |      <   | j	                  |      5 }| j
                  j                  |j                               D ]  }|j                         }|d   X| j                  |d      j                  |       |d   j                         D ]   }| j                  |   j                  |       " p|d   6|| j                  |d   <   |d   }|| j                  | j                  |      <   J d        	 ddd       / y# 1 sw Y   ;xY w)a  
        Initialize the indexes ``_lemma_to_class``,
        ``_wordnet_to_class``, and ``_class_to_fileid`` by scanning
        through the corpus fileids.  This doesn't do proper xml parsing,
        but is good enough to find everything in the standard VerbNet
        corpus -- and it runs about 30 times faster than xml parsing
        (with the python ElementTree; only 2-3 times faster
        if ElementTree uses the C implementation).
        Nr         zunexpected match condition)r9   r   r   r_   open	_INDEX_REfinditerreadgroupsr   rG   r&   r   )r   r1   r   fpmrh   r$   s          r   r   z VerbnetCorpusReader._quick_index  s/    mmFSbkG-3D!!'*=DD##DLL$9:6"b00;AXXZFay,,,VAY7>>wG"()//"3B 2226==gF #4.;A--fQi8"()IP//W0EFB&BBu < #"	 $ #"s   CD>>E	c                     | j                   j                  |      r|S | j                  j                  |      st        d|z        	 | j                  |   S # t
        $ r}t        d|z        |d}~ww xY w)zReturns longid of a VerbNet class

        Given a short VerbNet class identifier (eg '37.10'), map it
        to a long id (eg 'confess-37.10').  If ``shortid`` is already a
        long id, then return it as-isvnclass identifier %r not foundN)
_LONGID_REmatch_SHORTID_REr<   r   KeyError)r   r_   es      r   r;   zVerbnetCorpusReader.longid;  s|     ??  )N!!''0>HII	Q**733 	Q>HIqP	Qs   A 	A4 A//A4c                     | j                   j                  |      r|S | j                  j                  |      }|r|j                  d      S t	        d|z        )zReturns shortid of a VerbNet class

        Given a long VerbNet class identifier (eg 'confess-37.10'),
        map it to a short id (eg '37.10').  If ``longid`` is already a
        short id, then return it as-is.rc   rl   )ro   rn   rm   groupr<   )r   r;   rj   s      r   r_   zVerbnetCorpusReader.shortidJ  sS     !!&)MOO!!&)771:>GHHr   c                 *   g }|j                  d      D ]w  }|j                  d      D cg c]%  }|j                  d      |j                  d      d' }}|j                  |j                  d      ||j                  d      dk(  d       y |S c c}w )	an  Returns semantics within a single frame

        A utility function to retrieve semantics within a frame in VerbNet
        Members of the semantics dictionary:
        1) Predicate value
        2) Arguments

        :param vnframe: An ElementTree containing the xml contents of
            a VerbNet frame.
        :return: semantics: semantics dictionary
        zSEMANTICS/PREDzARGS/ARGrR   rV   )rR   rV   bool!)predicate_value	argumentsnegated)r   r   rG   )r   rN   semantics_within_single_framepredargrx   s         r   rK   z/VerbnetCorpusReader._get_semantics_within_frame\  s     )+%OO$45D  <<
33C 37773CD3   *00'+xx'8!*#xx/36 6 -,s   *Bc                 L    |j                  d      }||j                  }|S d}|S )a'  Returns example within a frame

        A utility function to retrieve an example within a frame in VerbNet.

        :param vnframe: An ElementTree containing the xml contents of
            a VerbNet frame.
        :return: example_text: The example sentence for this particular frame
        zEXAMPLES/EXAMPLEr%   )findtext)r   rN   example_elementexample_texts       r   rH   z-VerbnetCorpusReader._get_example_within_framew  s9     ",,'9:&*//L  Lr   c                 h    |j                  d      }|j                  d   |j                  dd      dS )ab  Returns member description within frame

        A utility function to retrieve a description of participating members
        within a frame in VerbNet.

        :param vnframe: An ElementTree containing the xml contents of
            a VerbNet frame.
        :return: description: a description dictionary with members - primary and secondary
        DESCRIPTIONprimary	secondaryr%   )r   r   )r~   attribr   )r   rN   description_elements      r   rI   z1VerbnetCorpusReader._get_description_within_frame  s;     &ll=9*11)<,00bA
 	
r   c                    g }|j                  d      D ]  }|j                  }t               }d|j                  v r|j	                  d      nd|d<   |j                  d      D cg c]%  }|j	                  d      |j	                  d      d' c}|d<   |j                  d	      D cg c]%  }|j	                  d      |j	                  d      d' c}|d
<   |j                  ||d        |S c c}w c c}w )a[  Returns semantics within a frame

        A utility function to retrieve semantics within a frame in VerbNet.
        Members of the syntactic dictionary:
        1) POS Tag
        2) Modifiers

        :param vnframe: An ElementTree containing the xml contents of
            a VerbNet frame.
        :return: syntax_within_single_frame
        SYNTAXrV   r%   rS   rT   rR   rU   	selrestrszSYNRESTRS/SYNRESTR	synrestrs)pos_tagrW   )r~   tagdictr   r   r   rG   )r   rN   syntax_within_single_frameeltr   rW   rZ   s          r   rJ   z4VerbnetCorpusReader._get_syntactic_list_within_frame  s    &("<<)CggGI5<

5J!1PRIg ![[)=>&>E  ))G,eii6GH>&Ik" ![[)=>&>E  ))G,eii6GH>&Ik" '--#)< * *)&&s   $*C/'*C4c                 F   t        |t              r| j                  |      }|j                  d      dz   }|| j	                  |d      dz   z  }|| j                  |d      dz   z  }|dz  }|| j                  |d      dz   z  }|dz  }|| j                  |d      z  }|S )a%  Returns pretty printed version of a VerbNet class

        Return a string containing a pretty-printed representation of
        the given VerbNet class.

        :param vnclass: A VerbNet class identifier; or an ElementTree
            containing the xml contents of a VerbNet class.
        r-   
  )indentz  Thematic roles:
    z
  Frames:
)r   r   r   r   pprint_subclassespprint_memberspprint_themrolespprint_frames)r   r   ss      r   pprintzVerbnetCorpusReader.pprint  s     gs#ll7+GKK$	T##GD#9D@@	T   6==	""	T""76":TAA	]	T77r   c                     t        |t              r| j                  |      }| j                  |      }|sdg}ddj	                  |      z   }t        j                  |d||dz         S )a>  Returns pretty printed version of subclasses of VerbNet class

        Return a string containing a pretty-printed representation of
        the given VerbNet class's subclasses.

        :param vnclass: A VerbNet class identifier; or an ElementTree
            containing the xml contents of a VerbNet class.
        (none)zSubclasses:  F   r   initial_indentsubsequent_indent)r   r   r   rP   jointextwrapfill)r   r   r   rP   r   s        r   r   z%VerbnetCorpusReader.pprint_subclasses  se     gs#ll7+G__W-
"JSXXj11}}r&FTM
 	
r   c                     t        |t              r| j                  |      }| j                  |      }|sdg}ddj	                  |      z   }t        j                  |d||dz         S )a?  Returns pretty printed version of members in a VerbNet class

        Return a string containing a pretty-printed representation of
        the given VerbNet class's member verbs.

        :param vnclass: A VerbNet class identifier; or an ElementTree
            containing the xml contents of a VerbNet class.
        r   z	Members: r   r   r   r   )r   r   r   r!   r   r   r   )r   r   r   membersr   s        r   r   z"VerbnetCorpusReader.pprint_members  se     gs#ll7+G++g&jG#((7++}}r&FTM
 	
r   c                 p   t        |t              r| j                  |      }g }| j                  |      D ]j  }|dz   |j	                  d      z   }|d   D cg c]  }|d   |d   z    }}|r#|dj                  dj                  |            z  }|j                  |       l dj                  |      S c c}w )aH  Returns pretty printed version of thematic roles in a VerbNet class

        Return a string containing a pretty-printed representation of
        the given VerbNet class's thematic roles.

        :param vnclass: A VerbNet class identifier; or an ElementTree
            containing the xml contents of a VerbNet class.
        * rR   rW   rV   [{}]r   r   )r   r   r   rX   r   formatr   rG   )r   r   r   piecesthemrolepiecemodifierrW   s           r   r   z$VerbnetCorpusReader.pprint_themroles  s     gs#ll7+Gw/HTMHLL$88E !) 5 5H !HV$44 5   sxx	':;;MM%  0 yy  s   B3c                     t        |t              r| j                  |      }g }| j                  |      D ]#  }|j	                  | j                  ||             % dj                  |      S )a?  Returns pretty version of all frames in a VerbNet class

        Return a string containing a pretty-printed representation of
        the list of frames within the VerbNet class.

        :param vnclass: A VerbNet class identifier; or an ElementTree
            containing the xml contents of a VerbNet class.
        r   )r   r   r   rL   rG   _pprint_single_framer   )r   r   r   r   rN   s        r   r   z!VerbnetCorpusReader.pprint_frames  s\     gs#ll7+G{{7+GMM$33GVDE ,yy  r   c                     | j                  ||      dz   }|| j                  ||dz         dz   z  }|| j                  ||dz         dz   z  }||dz   z  }|| j                  ||dz         z  }|S )a  Returns pretty printed version of a single frame in a VerbNet class

        Returns a string containing a pretty-printed representation of
        the given frame.

        :param vnframe: An ElementTree containing the xml contents of
            a VerbNet frame.
        r   r   z
  Syntax: z  Semantics:
r   ) _pprint_description_within_frame_pprint_example_within_frame_pprint_syntax_within_frame_pprint_semantics_within_frame)r   rN   r   frame_strings       r   r   z(VerbnetCorpusReader._pprint_single_frame  s     <<WfMPTT99'6C<PSWWW,,Wf|6KLtS	
 	!111;;GVf_UUr   c                 $    |d   r|dz   |d   z   S y)a&  Returns pretty printed version of example within frame in a VerbNet class

        Return a string containing a pretty-printed representation of
        the given VerbNet frame example.

        :param vnframe: An ElementTree containing the xml contents of
            a Verbnet frame.
        rC   z
 Example: N )r   rN   r   s      r   r   z0VerbnetCorpusReader._pprint_example_within_frame0  s&     9L(79+=== r   c                 `    ||d   d   z   }|d   d   r|dj                  |d   d         z  }|S )a  Returns pretty printed version of a VerbNet frame description

        Return a string containing a pretty-printed representation of
        the given VerbNet frame description.

        :param vnframe: An ElementTree containing the xml contents of
            a VerbNet frame.
        rD   r   r   z ({}))r   )r   rN   r   rD   s       r   r   z4VerbnetCorpusReader._pprint_description_within_frame<  sH     w}5i@@=!+.7>>'-*@*MNNKr   c           
         g }|d   D ]  }|d   }g }d|d   v r|d   d   r|j                  |d   d          ||d   d   |d   d   z   D cg c]  }dj                  |d   |d          c}z  }|r#|d	j                  d
j                  |            z  }|j                  |        |d
j                  |      z   S c c}w )a&  Returns pretty printed version of syntax within a frame in a VerbNet class

        Return a string containing a pretty-printed representation of
        the given VerbNet frame syntax.

        :param vnframe: An ElementTree containing the xml contents of
            a VerbNet frame.
        rE   r   rV   rW   r   r   z{}{}rR   r   r   )rG   r   r   )r   rN   r   r   elementr   modifier_listrZ   s           r   r   z/VerbnetCorpusReader._pprint_syntax_within_frameJ  s     x(GI&EM'+..7;3G3P$$W[%9'%BC K(5k*;788  eGneFm<8 M sxx'>??MM%  )  (((s   B<c           	          g }|d   D ]I  }|d   D cg c]  }|d   	 }}|j                  |d   rdnd |d    dd	j                  |       d
       K dj                  fd|D              S c c}w )a,  Returns a pretty printed version of semantics within frame in a VerbNet class

        Return a string containing a pretty-printed representation of
        the given VerbNet frame semantics.

        :param vnframe: An ElementTree containing the xml contents of
            a VerbNet frame.
        rF   rx   rV   ry      ¬r%   rw   (z, )r   c              3   ,   K   | ]  } d |   yw)r   Nr   )r'   r   r   s     r   r(   zEVerbnetCorpusReader._pprint_semantics_within_frame.<locals>.<genexpr>u  s     B6%F82eW-6s   )rG   r   )r   rN   r   r   	predicateargumentrx   s     `    r   r   z2VerbnetCorpusReader._pprint_semantics_within_framef  s      -I;D[;QR;Qx'*;QIRMM$Y/4R8CT9U8VVWX\XaXabkXlWmmno .
 yyB6BBB	 Ss   A/)Fr   )NNNN)r%   )%__name__
__module____qualname____doc__r	   recompilerm   ro   re   r!   r*   r7   r   r   rL   rP   rX   r]   r\   r   r;   r_   rK   rH   rI   rJ   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r      s    . 67JB"**]+K2

WI!X&88HB 8$>	9
1CDQI$-6 
 *D*
(
(!0! $
>)8Cr   r   )r   r   r   collectionsr   nltk.corpus.reader.xmldocsr   r   r   r   r   <module>r      s(    
  # 6_	C/ _	Cr   