
    geC                         d dl Z d dlmZ d dlmZ d dl d dl d dlmZ d dl	m
Z
  G d de      Z G d	 d
      Z G d d      Z G d de      Z G d de      Ze G d de             Z G d d      Zy)    N)total_ordering)ElementTree)*)raise_unorderable_types)Treec                   P    e Zd ZdZ	 	 	 	 	 ddZddZd Zd ZddZd Z	d	 fd
Z
y)PropbankCorpusReadera  
    Corpus reader for the propbank corpus, which augments the Penn
    Treebank with information about the predicate argument structure
    of every verb instance.  The corpus consists of two parts: the
    predicate-argument annotations themselves, and a set of "frameset
    files" which define the argument labels used by the annotations,
    on a per-verb basis.  Each "frameset file" contains one or more
    predicates, such as ``'turn'`` or ``'turn_on'``, each of which is
    divided into coarse-grained word senses called "rolesets".  For
    each "roleset", the frameset file provides descriptions of the
    argument roles, along with examples.
    Nc                     t        |t              rt        ||      }t        |      }t        j                  | |||g|z   |       || _        || _        || _        || _	        || _
        y)a  
        :param root: The root directory for this corpus.
        :param propfile: The name of the file containing the predicate-
            argument annotations (relative to ``root``).
        :param framefiles: A list or regexp specifying the frameset
            fileids for this corpus.
        :param parse_fileid_xform: A transform that should be applied
            to the fileids in this corpus.  This should be a function
            of one argument (a fileid) that returns a string (the new
            fileid).
        :param parse_corpus: The corpus containing the parse trees
            corresponding to this corpus.  These parse trees are
            necessary to resolve the tree pointers used by propbank.
        N)
isinstancestrfind_corpus_fileidslistCorpusReader__init__	_propfile_framefiles
_verbsfile_parse_fileid_xform_parse_corpus)selfrootpropfile
framefiles	verbsfileparse_fileid_xformparse_corpusencodings           P/var/www/openai/venv/lib/python3.12/site-packages/nltk/corpus/reader/propbank.pyr   zPropbankCorpusReader.__init__    sj    2 j#&,T:>J*%
dD8Y*?**LhW "%##5 )    c                      i fdd<   t         j                   j                         fd j                   j                              S )z
        :return: a corpus view that acts as a list of
            ``PropBankInstance`` objects, one for each noun in the corpus.
        c                 "    | j                   k(  S N)baseform)instr#   s    r   <lambda>z0PropbankCorpusReader.instances.<locals>.<lambda>M   s    T]]h5Nr   instance_filterc                 *     j                   | fi S r"   )_read_instance_block)streamkwargsr   s    r   r%   z0PropbankCorpusReader.instances.<locals>.<lambda>P   s    4444VFvFr   r   )StreamBackedCorpusViewabspathr   r   )r   r#   r*   s   ``@r   	instanceszPropbankCorpusReader.instancesF   sK    
 (NF$%%LL(F]]4>>2
 	
r   c                     t        | j                  | j                        t        | j	                  | j                              S )z
        :return: a corpus view that acts as a list of strings, one for
            each line in the predicate-argument annotation file.
        r+   )r,   r-   r   read_line_blockr   r   s    r   lineszPropbankCorpusReader.linesT   s4    
 &LL(]]4>>2
 	
r   c                    |j                  d      d   }d|z  }|| j                  vrt        d|z        | j                  |      j	                         5 }t        j                  |      j                         }ddd       j                  d      D ]  }|j                  d   |k(  s|c S  t        d| d	|       # 1 sw Y   FxY w)
zE
        :return: the xml description for the given roleset.
        .r   frames/%s.xmlFrameset file for %s not foundNpredicate/rolesetidzRoleset z not found in )
splitr   
ValueErrorr-   openr   parsegetrootfindallattrib)r   
roleset_idr#   	framefilefpetreerolesets          r   rD   zPropbankCorpusReader.roleset_   s     ##C(+#h.	D,,,=
JKK \\)$))+r%%b)113E ,}}%89G~~d#z1 : 8J<~i[IJJ ,+s   $B>>Cc                 ~   |%d|z  }|| j                   vrt        d|z        |g}n| j                   }g }|D ]m  }| j                  |      j                         5 }t	        j
                  |      j                         }ddd       |j                  j                  d             o t        |      S # 1 sw Y   6xY w)zA
        :return: list of xml descriptions for rolesets.
        Nr5   r6   r7   )
r   r:   r-   r;   r   r<   r=   appendr>   LazyConcatenation)r   r#   rA   r   rsetsrB   rC   s          r   rolesetszPropbankCorpusReader.rolesetsq   s     '(2I 0 00 !AH!LMM#J))J#I i(--/2#))"-557 0LL':;< $ !'' 0/s   $B33B<	c                     t        | j                  | j                        t        | j	                  | j                              S )z
        :return: a corpus view that acts as a list of all verb lemmas
            in this corpus (from the verbs.txt file).
        r+   )r,   r-   r   r0   r   r1   s    r   verbszPropbankCorpusReader.verbs   s4    
 &LL)]]4??3
 	
r   c                      y)NT )r$   s    r   r%   zPropbankCorpusReader.<lambda>   s    r   c                     g }t        d      D ]h  }|j                         j                         }|s$t        j	                  || j
                  | j                        } ||      sX|j                  |       j |S )Nd   )rangereadlinestripPropbankInstancer<   r   r   rF   )r   r)   r&   blockiliner$   s          r   r(   z)PropbankCorpusReader._read_instance_block   so     sA??$**,D'--$22D4F4F #4(LL&  r   ) NNNutf8r"   )__name__
__module____qualname____doc__r   r.   r2   rD   rI   rK   r(   rM   r   r   r	   r	      sC    " $*L
	
K$(*	
 <M r   r	   c                   ~    e Zd Z	 ddZed        Zed        Zed        Zd Zd Z	d Z
 ee
d	
      Zedd       Zy)rS   Nc
                     || _         	 || _        	 || _        	 || _        	 || _        	 || _        	 || _        	 t        |      | _        	 |	| _	        y r"   )
fileidsentnumwordnumtaggerrD   
inflection	predicatetuple	argumentsr   )
r   r_   r`   ra   rb   rD   rc   rd   rf   r   s
             r   r   zPropbankInstance.__init__   s     	  	& 	D 	: 	2 %	& #	@ y)	 )	.r   c                 >    | j                   j                  d      d   S )zThe baseform of the predicate.r4   r   rD   r9   r1   s    r   r#   zPropbankInstance.baseform        ||!!#&q))r   c                 >    | j                   j                  d      d   S )z"The sense number of the predicate.r4      rh   r1   s    r   sensenumberzPropbankInstance.sensenumber   ri   r   c                      y)zIdentifier of the predicate.relrM   r1   s    r   predidzPropbankInstance.predid   s     r   c                 d    dj                  | j                  | j                  | j                        S )Nz(<PropbankInstance: {}, sent {}, word {}>)formatr_   r`   ra   r1   s    r   __repr__zPropbankInstance.__repr__   s*    9@@KKLLLL
 	
r   c                    dj                  | j                  | j                  | j                  | j                  | j
                  | j                        }| j                  | j                  dffz   }t        |      D ]  \  }}|d| d| z  } |S )Nz{} {} {} {} {} {}rn    -)
rq   r_   r`   ra   rb   rD   rc   rf   rd   sorted)r   sitemsarglocargids        r   __str__zPropbankInstance.__str__   s    &&KKLLLLKKLLOO
 4>>5"9!;;#E]MFE1VHAeW%%A +r   c                     | j                   y | j                  | j                   j                         vry | j                   j                  | j                        | j                     S r"   )r   r_   fileidsparsed_sentsr`   r1   s    r   	_get_treezPropbankInstance._get_tree   sS    $;;d//7799  --dkk:4<<HHr   zs
        The parse tree corresponding to this instance, or None if
        the corresponding tree is not available.)docc                    | j                         }t        |      dk  rt        d| z        |d d \  }}}}}}	|dd  D 
cg c]  }
|
j                  d      s|
 }}
|dd  D 
cg c]  }
|
j                  d      r|
 }}
t        |      dk7  rt        d| z        | ||      }t	        |      }t	        |      }t
        j                  |	      }	t        j                  |d   d d       }g }|D ]=  }|j                  dd      \  }}|j                  t        j                  |      |f       ? t        ||||||	|||	      S c c}
w c c}
w )	N   z!Badly formatted propbank line: %r   z-relrk   r   ru   )
r9   lenr:   endswithintPropbankInflectionr<   PropbankTreePointerrF   rS   )rw   r   r   piecesr_   r`   ra   rb   rD   rc   prn   argsrd   rf   argry   rz   s                     r   r<   zPropbankInstance.parse  su   v;?@1DEE CI!*?'67J *;*Q

6(:q*;!!":@:aQZZ-?:@s8q=@1DEE )'/F g,g, (--j9
 (--c!fSbk:	 	CIIc1-MFE177?GH 
  

 
	
5 <@s    EE$E	;E	r"   )NN)rY   rZ   r[   r   propertyr#   rl   ro   rr   r{   r   treestaticmethodr<   rM   r   r   rS   rS      s     3.j * * * *  
I 4D +
 +
r   rS   c                       e Zd ZdZd Zy)PropbankPointera  
    A pointer used by propbank to identify one or more constituents in
    a parse tree.  ``PropbankPointer`` is an abstract base class with
    three concrete subclasses:

      - ``PropbankTreePointer`` is used to point to single constituents.
      - ``PropbankSplitTreePointer`` is used to point to 'split'
        constituents, which consist of a sequence of two or more
        ``PropbankTreePointer`` pointers.
      - ``PropbankChainTreePointer`` is used to point to entire trace
        chains in a tree.  It consists of a sequence of pieces, which
        can be ``PropbankTreePointer`` or ``PropbankSplitTreePointer`` pointers.
    c                 >    | j                   t        k(  r
t               y r"   )	__class__r   NotImplementedErrorr1   s    r   r   zPropbankPointer.__init__L  s    >>_,%'' -r   N)rY   rZ   r[   r\   r   rM   r   r   r   r   =  s    (r   r   c                   $    e Zd Zd Zd Zd Zd Zy)PropbankChainTreePointerc                     || _         y r"   r   r   r   s     r   r   z!PropbankChainTreePointer.__init__R  s    	0r   c                 F    dj                  d | j                  D              S )Nr   c              3   &   K   | ]	  }d |z    ywz%sNrM   .0r   s     r   	<genexpr>z3PropbankChainTreePointer.__str__.<locals>.<genexpr>Y       6+Qq+   joinr   r1   s    r   r{   z PropbankChainTreePointer.__str__X      xx6$++666r   c                     d| z  S )Nz<PropbankChainTreePointer: %s>rM   r1   s    r   rr   z!PropbankChainTreePointer.__repr__[      /$66r   c           	          |t        d      t        d| j                  D cg c]  }|j                  |       c}      S c c}w )NParse tree not availablez*CHAIN*r:   r   r   selectr   r   r   s      r   r   zPropbankChainTreePointer.select^  >    <788ID1DEED   A
NrY   rZ   r[   r   r{   rr   r   rM   r   r   r   r   Q  s    077Fr   r   c                   $    e Zd Zd Zd Zd Zd Zy)PropbankSplitTreePointerc                     || _         y r"   r   r   s     r   r   z!PropbankSplitTreePointer.__init__e  s    	4r   c                 F    dj                  d | j                  D              S )N,c              3   &   K   | ]	  }d |z    ywr   rM   r   s     r   r   z3PropbankSplitTreePointer.__str__.<locals>.<genexpr>k  r   r   r   r1   s    r   r{   z PropbankSplitTreePointer.__str__j  r   r   c                     d| z  S )Nz<PropbankSplitTreePointer: %s>rM   r1   s    r   rr   z!PropbankSplitTreePointer.__repr__m  r   r   c           	          |t        d      t        d| j                  D cg c]  }|j                  |       c}      S c c}w )Nr   z*SPLIT*r   r   s      r   r   zPropbankSplitTreePointer.selectp  r   r   Nr   rM   r   r   r   r   d  s    4
77Fr   r   c                   P    e Zd ZdZd Zed        Zd Zd Zd Z	d Z
d Zd	 Zd
 Zy)r   z@
    wordnum:height*wordnum:height*...
    wordnum:height,

    c                      || _         || _        y r"   ra   height)r   ra   r   s      r   r   zPropbankTreePointer.__init__~  s    r   c                    | j                  d      }t        |      dkD  r,t        |D cg c]  }t        j	                  |       c}      S | j                  d      }t        |      dkD  r,t        |D cg c]  }t        j	                  |       c}      S | j                  d      }t        |      dk7  rt        d| z        t        t        |d         t        |d               S c c}w c c}w )Nr   rk   r   :   zbad propbank pointer %rr   )r9   r   r   r   r<   r   r:   r   )rw   r   elts      r   r<   zPropbankTreePointer.parse  s     v;?+;AB6C$**3/6B 
 v;?+;AB6C$**3/6B 
 v;!6:;;"3vay>3vay>BB C Cs   C(4C-c                 8    | j                    d| j                   S )Nr   r   r1   s    r   r{   zPropbankTreePointer.__str__  s    ,,q..r   c                 8    d| j                   | j                  fz  S )NzPropbankTreePointer(%d, %d)r   r1   s    r   rr   zPropbankTreePointer.__repr__  s    ,dkk/JJJr   c                 
   t        |t        t        f      r&|j                  d   }t        |t        t        f      r&t        |t              s| |u S | j
                  |j
                  k(  xr | j                  |j                  k(  S Nr   )r   r   r   r   r   ra   r   r   others     r   __eq__zPropbankTreePointer.__eq__  so    !9;S TULLOE !9;S TU %!455= ||u}},L1LLr   c                     | |k(   S r"   rM   r   s     r   __ne__zPropbankTreePointer.__ne__  s    5=  r   c                 .   t        |t        t        f      r&|j                  d   }t        |t        t        f      r&t        |t              st        |       t        |      k  S | j                  | j                   f|j                  |j                   fk  S r   )r   r   r   r   r   r8   ra   r   r   s     r   __lt__zPropbankTreePointer.__lt__  s|    !9;S TULLOE !9;S TU %!45d8bi''t{{l+u}}u||m.LLLr   c                 D    |t        d      || j                  |         S )Nr   )r:   treepos)r   r   s     r   r   zPropbankTreePointer.select  s'    <788DLL&''r   c                    |t        d      |g}g }d}	 t        |d   t              rt        |      t        |      k  r|j	                  d       n|dxx   dz  cc<   |d   t        |d         k  r|j	                  |d   |d             nl|j                          |j                          nK|| j                  k(  r't        |dt        |      | j                  z
  dz
         S |dz  }|j                          )z}
        Convert this pointer to a standard 'tree position' pointer,
        given that it points to the given tree.
        Nr   r   rk   )	r:   r   r   r   rF   popra   re   r   )r   r   stackr   ra   s        r   r   zPropbankTreePointer.treepos  s    
 <788%)T*w<#e*,NN1%BK1$K2;U2Y/LLr72;!78 IIKKKM dll* )I3w<$+++E+I!JKKqLGIIK+ r   N)rY   rZ   r[   r\   r   r   r<   r{   rr   r   r   r   r   r   rM   r   r   r   r   v  sH     C C*/KM!M(
  r   r   c                       e Zd ZdZdZdZdZdZdZdZ	dZ
dZdZd	Zd
ZdZdZddZd Zd Z ej*                  d      Zed        Zy)r   rU   gr   vfnob3aru   c                 J    || _         || _        || _        || _        || _        y r"   formtenseaspectpersonvoice)r   r   r   r   r   r   s         r   r   zPropbankInflection.__init__  s%    	

r   c                     | j                   | j                  z   | j                  z   | j                  z   | j                  z   S r"   r   r1   s    r   r{   zPropbankInflection.__str__  s0    yy4::%3dkkADJJNNr   c                     d| z  S )Nz<PropbankInflection: %s>rM   r1   s    r   rr   zPropbankInflection.__repr__  s    )D00r   z"[igpv\-][fpn\-][pob\-][3\-][ap\-]$c                     t        | t              st        d      t        |       dk7  st        j
                  j                  |       st        d| z        t	        |  S )Nzexpected a string   z!Bad propbank inflection string %r)r   r   	TypeErrorr   r   	_VALIDATEmatchr:   )rw   s    r   r<   zPropbankInflection.parse  sS    !S!/00q6Q;0::@@C@1DEE!1%%r   N)ru   ru   ru   ru   ru   )rY   rZ   r[   
INFINITIVEGERUND
PARTICIPLEFINITEFUTUREPASTPRESENTPERFECTPROGRESSIVEPERFECT_AND_PROGRESSIVETHIRD_PERSONACTIVEPASSIVENONEr   r{   rr   recompiler   r   r<   rM   r   r   r   r     s~    JFJFFDGGK!LFGDO1 

@AI& &r   r   )r   	functoolsr   	xml.etreer   nltk.corpus.reader.apinltk.corpus.reader.utilnltk.internalsr   	nltk.treer   r   r	   rS   r   r   r   r   r   rM   r   r   <module>r      s    
 $ ! $ % 2 L< LhT
 T
n( ((F F&F F$ a / a  a H,& ,&r   