
    g=                         d dl mZ d dlmZ d dl d dl d dlmZ d dlm	Z	  G d de
      Z G d d	      Z G d
 d      Z G d de      Z G d de      Ze G d de             Zy)    )total_ordering)ElementTree)*)raise_unorderable_types)Treec                   P    e Zd ZdZ	 	 	 	 	 ddZddZd Zd ZddZd Z	d	 fd
Z
y)NombankCorpusReadera  
    Corpus reader for the nombank corpus, which augments the Penn
    Treebank with information about the predicate argument structure
    of every noun instance.  The corpus consists of two parts: the
    predicate-argument annotations themselves, and a set of "frameset
    files" which define the argument labels used by the annotations,
    on a per-noun basis.  Each "frameset file" contains one or more
    predicates, such as ``'turn'`` or ``'turn_on'``, each of which is
    divided into coarse-grained word senses called "rolesets".  For
    each "roleset", the frameset file provides descriptions of the
    argument roles, along with examples.
    Nc                     t        |t              rt        ||      | _        t	        |      | _        t
        j                  | |||       || _        || _        || _	        || _
        y)a  
        :param root: The root directory for this corpus.
        :param nomfile: The name of the file containing the predicate-
            argument annotations (relative to ``root``).
        :param framefiles: A list or regexp specifying the frameset
            fileids for this corpus.
        :param parse_fileid_xform: A transform that should be applied
            to the fileids in this corpus.  This should be a function
            of one argument (a fileid) that returns a string (the new
            fileid).
        :param parse_corpus: The corpus containing the parse trees
            corresponding to this corpus.  These parse trees are
            necessary to resolve the tree pointers used by nombank.
        N)
isinstancestrfind_corpus_fileids_fileidslistCorpusReader__init___nomfile
_nounsfile_parse_fileid_xform_parse_corpus)selfrootnomfile
framefiles	nounsfileparse_fileid_xformparse_corpusencodings           O/var/www/openai/venv/lib/python3.12/site-packages/nltk/corpus/reader/nombank.pyr   zNombankCorpusReader.__init__    s\    4 j#&/jADMZ(dD*h?  ##5 )    c                      i fdd<   t         j                   j                         fd j                   j                              S )z
        :return: a corpus view that acts as a list of
            ``NombankInstance`` objects, one for each noun in the corpus.
        c                 "    | j                   k(  S N)baseform)instr#   s    r   <lambda>z/NombankCorpusReader.instances.<locals>.<lambda>M   s    T]]h5Nr   instance_filterc                 *     j                   | fi S r"   )_read_instance_block)streamkwargsr   s    r   r%   z/NombankCorpusReader.instances.<locals>.<lambda>P   s    4444VFvFr   r   )StreamBackedCorpusViewabspathr   r   )r   r#   r*   s   ``@r   	instanceszNombankCorpusReader.instancesF   sK    
 (NF$%%LL'F]]4==1
 	
r   c                     t        | j                  | j                        t        | j	                  | j                              S )z
        :return: a corpus view that acts as a list of strings, one for
            each line in the predicate-argument annotation file.
        r+   )r,   r-   r   read_line_blockr   r   s    r   lineszNombankCorpusReader.linesT   s4    
 &LL']]4==1
 	
r   c                    |j                  d      d   }|j                  dd      }|j                  dd      j                  dd      }d|z  }|| j                         vrt        d	|z        | j	                  |      j                         5 }t        j                  |      j                         }d
d
d
       j                  d      D ]  }|j                  d   |k(  s|c S  t        d| d|       # 1 sw Y   FxY w)zE
        :return: the xml description for the given roleset.
        .r   	perc-sign%oneslashonezero1/10
1-slash-10frames/%s.xmlFrameset file for %s not foundNpredicate/rolesetidzRoleset z not found in )splitreplacefileids
ValueErrorr-   openr   parsegetrootfindallattrib)r   
roleset_idr#   	framefilefpetreerolesets          r   rK   zNombankCorpusReader.roleset_   s     ##C(+##K5##$5v>FFL
 $h.	DLLN*=
JKK \\)$))+r%%b)113E ,}}%89G~~d#z1 : 8J<~i[IJJ ,+s   $C66C?c                    |)d|z  }|| j                         vrt        d|z        |g}n| j                         }g }|D ]m  }| j                  |      j                         5 }t	        j
                  |      j                         }ddd       |j                  j                  d             o t        |      S # 1 sw Y   6xY w)zA
        :return: list of xml descriptions for rolesets.
        Nr:   r;   r<   )
r@   rA   r-   rB   r   rC   rD   appendrE   LazyConcatenation)r   r#   rH   r   rsetsrI   rJ   s          r   rolesetszNombankCorpusReader.rolesetsu   s     '(2I. !AH!LMM#JJ#I i(--/2#))"-557 0LL':;< $ !'' 0/s   "$B;;C	c                     t        | j                  | j                        t        | j	                  | j                              S )z
        :return: a corpus view that acts as a list of all noun lemmas
            in this corpus (from the nombank.1.0.words file).
        r+   )r,   r-   r   r0   r   r1   s    r   nounszNombankCorpusReader.nouns   s4    
 &LL)]]4??3
 	
r   c                      y)NT )r$   s    r   r%   zNombankCorpusReader.<lambda>   s    r   c                     g }t        d      D ]h  }|j                         j                         }|s$t        j	                  || j
                  | j                        } ||      sX|j                  |       j |S )Nd   )rangereadlinestripNombankInstancerC   r   r   rM   )r   r)   r&   blockiliner$   s          r   r(   z(NombankCorpusReader._read_instance_block   so     sA??$**,D&,,$22D4F4F #4(LL&  r   ) NNNutf8r"   )__name__
__module____qualname____doc__r   r.   r2   rK   rP   rR   r(   rT   r   r   r	   r	      sC    " $*L
	
K,(*	
 <M r   r	   c                   ^    e Zd Z	 d
dZed        Zd Zd Zd Z eed      Z	e
dd	       Zy)rZ   Nc
                     || _         	 || _        	 || _        	 || _        	 || _        	 || _        	 || _        	 t        |      | _        	 |	| _	        y r"   )
fileidsentnumwordnumr#   sensenumber	predicatepredidtuple	argumentsr   )
r   rf   rg   rh   r#   ri   rj   rk   rm   r   s
             r   r   zNombankInstance.__init__   s}     	  	& 	D !,&0"	@ *y)	 )	-r   c                     | j                   j                  dd      }|j                  dd      j                  dd      }| d| j                   S )zThe name of the roleset used by this instance's predicate.
        Use ``nombank.roleset() <NombankCorpusReader.roleset>`` to
        look up information about the roleset.r6   r5   r8   r9   r7   r4   )r#   r?   ri   )r   rs     r   rK   zNombankInstance.roleset   sP    
 MM!!#{3IIfl+33LBSTAd&&'((r   c                 d    dj                  | j                  | j                  | j                        S )Nz'<NombankInstance: {}, sent {}, word {}>)formatrf   rg   rh   r1   s    r   __repr__zNombankInstance.__repr__   s*    8??KKLLLL
 	
r   c                    dj                  | j                  | j                  | j                  | j                  | j
                        }| j                  | j                  dffz   }t        |      D ]  \  }}|d| d| z  } |S )Nz{} {} {} {} {}rel -)	rq   rf   rg   rh   r#   ri   rm   rj   sorted)r   sitemsarglocargids        r   __str__zNombankInstance.__str__   s    ##KKLLLLMM
 4>>5"9!;;#E]MFE1VHAeW%%A +r   c                     | j                   y | j                  | j                   j                         vry | j                   j                  | j                        | j                     S r"   )r   rf   r@   parsed_sentsrg   r1   s    r   	_get_treezNombankInstance._get_tree   sS    $;;d//7799  --dkk:4<<HHr   zs
        The parse tree corresponding to this instance, or None if
        the corresponding tree is not available.)docc                 h   | j                         }t        |      dk  rt        d| z        |d d \  }}}}}|dd  }	t        |	      D 
cg c]  \  }
}d|v s|	j	                  |
       }}
}t        |      dk7  rt        d| z        | ||      }t        |      }t        |      }|d   j                  dd      \  }}t        j                  |      }g }|	D ]=  }|j                  dd      \  }}|j                  t        j                  |      |f       ? t        |||||||||	      S c c}}
w )N   z Badly formatted nombank line: %r   z-rel   r   rv   )
r>   lenrA   	enumeratepopintNombankTreePointerrC   rM   rZ   )rx   r   r   piecesrf   rg   rh   r#   ri   argsr\   prt   predlocrk   rj   rm   argrz   r{   s                       r   rC   zNombankInstance.parse  sW   v;??!CDD =C2AJ9'8[abz'0Ftq!&A+txx{Fs8q=?!CDD )'/F g,g, a&,,sA.&,,W5	 	CIIc1-MFE066v>FG 
 

 
	
1 Gs   D.D.r"   )NN)r`   ra   rb   r   propertyrK   rr   r|   r   treestaticmethodrC   rT   r   r   rZ   rZ      sY     /-b ) )
I 4D +
 +
r   rZ   c                       e Zd ZdZd Zy)NombankPointeran  
    A pointer used by nombank to identify one or more constituents in
    a parse tree.  ``NombankPointer`` is an abstract base class with
    three concrete subclasses:

    - ``NombankTreePointer`` is used to point to single constituents.
    - ``NombankSplitTreePointer`` is used to point to 'split'
      constituents, which consist of a sequence of two or more
      ``NombankTreePointer`` pointers.
    - ``NombankChainTreePointer`` is used to point to entire trace
      chains in a tree.  It consists of a sequence of pieces, which
      can be ``NombankTreePointer`` or ``NombankSplitTreePointer`` pointers.
    c                 >    | j                   t        k(  r
t               y r"   )	__class__r   NotImplementedErrorr1   s    r   r   zNombankPointer.__init__E  s    >>^+%'' ,r   N)r`   ra   rb   rc   r   rT   r   r   r   r   6  s    (r   r   c                   $    e Zd Zd Zd Zd Zd Zy)NombankChainTreePointerc                     || _         y r"   r   r   r   s     r   r   z NombankChainTreePointer.__init__K  s    	/r   c                 F    dj                  d | j                  D              S )Nr   c              3   &   K   | ]	  }d |z    ywz%sNrT   .0r   s     r   	<genexpr>z2NombankChainTreePointer.__str__.<locals>.<genexpr>R       6+Qq+   joinr   r1   s    r   r|   zNombankChainTreePointer.__str__Q      xx6$++666r   c                     d| z  S )Nz<NombankChainTreePointer: %s>rT   r1   s    r   rr   z NombankChainTreePointer.__repr__T      .55r   c           	          |t        d      t        d| j                  D cg c]  }|j                  |       c}      S c c}w )NParse tree not availablez*CHAIN*rA   r   r   selectr   r   r   s      r   r   zNombankChainTreePointer.selectW  >    <788ID1DEED   A
Nr`   ra   rb   r   r|   rr   r   rT   r   r   r   r   J  s    /76Fr   r   c                   $    e Zd Zd Zd Zd Zd Zy)NombankSplitTreePointerc                     || _         y r"   r   r   s     r   r   z NombankSplitTreePointer.__init__^  s    	3r   c                 F    dj                  d | j                  D              S )N,c              3   &   K   | ]	  }d |z    ywr   rT   r   s     r   r   z2NombankSplitTreePointer.__str__.<locals>.<genexpr>d  r   r   r   r1   s    r   r|   zNombankSplitTreePointer.__str__c  r   r   c                     d| z  S )Nz<NombankSplitTreePointer: %s>rT   r1   s    r   rr   z NombankSplitTreePointer.__repr__f  r   r   c           	          |t        d      t        d| j                  D cg c]  }|j                  |       c}      S c c}w )Nr   z*SPLIT*r   r   s      r   r   zNombankSplitTreePointer.selecti  r   r   Nr   rT   r   r   r   r   ]  s    3
76Fr   r   c                   P    e Zd ZdZd Zed        Zd Zd Zd Z	d Z
d Zd	 Zd
 Zy)r   z@
    wordnum:height*wordnum:height*...
    wordnum:height,

    c                      || _         || _        y r"   rh   height)r   rh   r   s      r   r   zNombankTreePointer.__init__w  s    r   c                    | j                  d      }t        |      dkD  r,t        |D cg c]  }t        j	                  |       c}      S | j                  d      }t        |      dkD  r,t        |D cg c]  }t        j	                  |       c}      S | j                  d      }t        |      dk7  rt        d| z        t        t        |d         t        |d               S c c}w c c}w )Nr   r   r   :   zbad nombank pointer %rr   )r>   r   r   r   rC   r   rA   r   )rx   r   elts      r   rC   zNombankTreePointer.parse{  s     v;?*:@A&3#))#.&A 
 v;?*:@A&3#))#.&A 
 v;!59::!#fQi.#fQi.AA B Bs   C(4C-c                 8    | j                    d| j                   S )Nr   r   r1   s    r   r|   zNombankTreePointer.__str__  s    ,,q..r   c                 8    d| j                   | j                  fz  S )NzNombankTreePointer(%d, %d)r   r1   s    r   rr   zNombankTreePointer.__repr__  s    +t||T[[.IIIr   c                 
   t        |t        t        f      r&|j                  d   }t        |t        t        f      r&t        |t              s| |u S | j
                  |j
                  k(  xr | j                  |j                  k(  S Nr   )r   r   r   r   r   rh   r   r   others     r   __eq__zNombankTreePointer.__eq__  so    !8:Q RSLLOE !8:Q RS %!345= ||u}},L1LLr   c                     | |k(   S r"   rT   r   s     r   __ne__zNombankTreePointer.__ne__  s    5=  r   c                 .   t        |t        t        f      r&|j                  d   }t        |t        t        f      r&t        |t              st        |       t        |      k  S | j                  | j                   f|j                  |j                   fk  S r   )r   r   r   r   r   r=   rh   r   r   s     r   __lt__zNombankTreePointer.__lt__  s|    !8:Q RSLLOE !8:Q RS %!34d8bi''t{{l+u}}u||m.LLLr   c                 D    |t        d      || j                  |         S )Nr   )rA   treepos)r   r   s     r   r   zNombankTreePointer.select  s'    <788DLL&''r   c                    |t        d      |g}g }d}	 t        |d   t              rt        |      t        |      k  r|j	                  d       n|dxx   dz  cc<   |d   t        |d         k  r|j	                  |d   |d             nl|j                          |j                          nK|| j                  k(  r't        |dt        |      | j                  z
  dz
         S |dz  }|j                          )z}
        Convert this pointer to a standard 'tree position' pointer,
        given that it points to the given tree.
        Nr   r   r   )	rA   r   r   r   rM   r   rh   rl   r   )r   r   stackr   rh   s        r   r   zNombankTreePointer.treepos  s    
 <788%)T*w<#e*,NN1%BK1$K2;U2Y/LLr72;!78 IIKKKM dll* )I3w<$+++E+I!JKKqLGIIK+ r   N)r`   ra   rb   rc   r   r   rC   r|   rr   r   r   r   r   r   rT   r   r   r   r   o  sH     B B*/JM!M(
  r   r   N)	functoolsr   	xml.etreer   nltk.corpus.reader.apinltk.corpus.reader.utilnltk.internalsr   	nltk.treer   r   r	   rZ   r   r   r   r   rT   r   r   <module>r      sy    % ! $ % 2 P, PpI
 I
X( ((Fn F&Fn F$ a  a  a r   