
    g6E                     n   d Z ddlmZ ddlmZmZmZmZmZm	Z	m
Z
mZmZmZmZmZmZ ddlmZmZmZmZmZmZmZmZ  G d de      Z G d dee      Z G d	 d
e      Z G d de      Z G d de      Z G d de      Z G d de      Z  G d de      Z! G d de      Z" G d de      Z# G d de      Z$ e        e        e        e        e       gZ% e        e        e        e       gZ& e        e
        e        e       gZ' e        e
        e        e       gZ( e        e        e        gZ) G d de      Z* G d de*      Z+ G d d e*      Z, G d! d"e*      Z- G d# d$e*      Z. G d% d&e*      Z/ e        e        e"        e#        e$       gZ0 e        e        e        e!       gZ1 e        e        e        e!       gZ2 e        e        e        e!       gZ3 G d' d(e*e      Z4 G d) d*e4      Z5 G d+ d,e4      Z6 G d- d.e4      Z7 G d/ d0e4      Z8	 	 	 	 	 	 d4d1Z9e:d2k(  r e9        y3y3)5a  
Data classes and parser implementations for *incremental* chart
parsers, which use dynamic programming to efficiently parse a text.
A "chart parser" derives parse trees for a text by iteratively adding
"edges" to a "chart".  Each "edge" represents a hypothesis about the tree
structure for a subsequence of the text.  The "chart" is a
"blackboard" for composing and combining these hypotheses.

A parser is "incremental", if it guarantees that for all i, j where i < j,
all edges ending at i are built before any edges ending at j.
This is appealing for, say, speech recognizer hypothesis filtering.

The main parser class is ``EarleyChartParser``, which is a top-down
algorithm, originally formulated by Jay Earley (1970).
    )perf_counter)BottomUpPredictCombineRuleBottomUpPredictRuleCachedTopDownPredictRuleChartChartParserEdgeIEmptyPredictRule"FilteredBottomUpPredictCombineRule!FilteredSingleEdgeFundamentalRuleLeafEdgeLeafInitRuleSingleEdgeFundamentalRuleTopDownInitRule)!FeatureBottomUpPredictCombineRuleFeatureBottomUpPredictRuleFeatureChartFeatureChartParserFeatureEmptyPredictRule FeatureSingleEdgeFundamentalRuleFeatureTopDownInitRuleFeatureTopDownPredictRulec                   <    e Zd Zd Zd Zd Zd Zd Zd Zd Z	d Z
y	)
IncrementalChartc                 j    t        d | j                         D              | _        i | _        i | _        y )Nc              3       K   | ]  }g   y wN .0xs     K/var/www/openai/venv/lib/python3.12/site-packages/nltk/parse/earleychart.py	<genexpr>z.IncrementalChart.initialize.<locals>.<genexpr>@   s     >,=q,=   )tuple
_positions
_edgelists_edge_to_cpls_indexesselfs    r"   
initializezIncrementalChart.initialize>   s.    >DOO,=>>       c                 4    t        | j                               S r   )list	iteredgesr*   s    r"   edgeszIncrementalChart.edgesI   s    DNN$%%r-   c                 (    d | j                   D        S )Nc              3   .   K   | ]  }|D ]  }|   y wr   r   )r    edgelistedges      r"   r#   z-IncrementalChart.iteredges.<locals>.<genexpr>M   s     J__s   )r'   r*   s    r"   r0   zIncrementalChart.iteredgesL   s    JT__JJr-   c                 F   | j                   |   }i k(  rt        |      S t        j                               }t	        |      }|| j
                  vr| j                  |       t	        fd|D              }t        | j
                  |   |   j                  |g             S )Nc              3   (   K   | ]	  }|     y wr   r   )r    keyrestrictionss     r"   r#   z*IncrementalChart.select.<locals>.<genexpr>^   s     =*3\#&*s   r'   itersortedkeysr%   r)   
_add_indexgetr+   endr9   r4   
restr_keysvalss     `   r"   selectzIncrementalChart.selectO   s    ??3' 2>! L--/0
:&
 T]]*OOJ'=*==DMM*-c266tR@AAr-   c                 j   |D ]   }t        t        |      rt        d|z         t        d | j	                         D              x}| j
                  |<   t        | j                        D ]F  \  }}||   }|D ]7  t        fd|D              }|j                  |g       j                         9 H y )NBad restriction: %sc              3       K   | ]  }i   y wr   r   r   s     r"   r#   z.IncrementalChart._add_index.<locals>.<genexpr>h        1P>O">Or$   c              3   @   K   | ]  } t        |               y wr   getattrr    r8   r5   s     r"   r#   z.IncrementalChart._add_index.<locals>.<genexpr>n   s     HZc/WT3/1Z   
hasattrr	   
ValueErrorr%   r&   r)   	enumerater'   
setdefaultappend	r+   rB   r8   indexrA   r4   
this_indexrC   r5   s	           @r"   r>   zIncrementalChart._add_indexa   s    C5#& !6!<== 
 -21Pdoo>O1P,PPj) 't7MCsJ HZHH%%dB/66t< ! 8r-   c                     j                         }| j                  j                         D ]=  \  }}t        fd|D              }||   j	                  |g       j                         ? y )Nc              3   @   K   | ]  } t        |               y wr   rJ   rL   s     r"   r#   z:IncrementalChart._register_with_indexes.<locals>.<genexpr>t   s     D#+s+-rM   rA   r)   itemsr%   rR   rS   r+   r5   rA   rB   rU   rC   s    `    r"   _register_with_indexesz'IncrementalChart._register_with_indexesq   sY    hhj!%!4!4!6JDDDD#J!!$+2248 "7r-   c                 \    | j                   |j                            j                  |       y r   )r'   rA   rS   )r+   r5   s     r"   _append_edgezIncrementalChart._append_edgew   s    
#**40r-   c                 :    t        | j                         dz         S )N   )range
num_leavesr*   s    r"   r&   zIncrementalChart._positionsz   s    T__&*++r-   N)__name__
__module____qualname__r,   r1   r0   rD   r>   r\   r^   r&   r   r-   r"   r   r   =   s,    	&KB$= 91,r-   r   c                       e Zd Zd Zd Zd Zy)FeatureIncrementalChartc                 J     j                   |   }i k(  rt        |      S t        j                               }t	        |      }| j
                  vr j                  |       t	         fd|D              }t         j
                  |   |   j                  |g             S )Nc              3   F   K   | ]  }j                  |           y wr   )_get_type_if_possible)r    r8   r9   r+   s     r"   r#   z1FeatureIncrementalChart.select.<locals>.<genexpr>   s%      
EOcD&&|C'89Zs   !r:   r@   s   ` `   r"   rD   zFeatureIncrementalChart.select   s    ??3' 2>! L--/0
:&
 T]]*OOJ' 
EO
 
 DMM*-c266tR@AAr-   c                 n    |D ]   }t        t        |      rt        d|z         t        d  j	                         D              x} j
                  |<   t         j                        D ]G  \  }}||   }|D ]8  t         fd|D              }|j                  |g       j                         : I y )NrF   c              3       K   | ]  }i   y wr   r   r   s     r"   r#   z5FeatureIncrementalChart._add_index.<locals>.<genexpr>   rH   r$   c              3   ^   K   | ]$  }j                   t        |                    & y wr   rj   rK   r    r8   r5   r+   s     r"   r#   z5FeatureIncrementalChart._add_index.<locals>.<genexpr>   s1      ) ../AwtS/A/CD)   *-rN   rT   s	   `       @r"   r>   z"FeatureIncrementalChart._add_index   s    C5#& !6!<== 
 -21Pdoo>O1P,PPj) 't7MCsJ  )  %%dB/66t< ! 8r-   c                      j                         } j                  j                         D ]>  \  }}t         fd|D              }||   j	                  |g       j                         @ y )Nc              3   ^   K   | ]$  }j                   t        |                    & y wr   rn   ro   s     r"   r#   zAFeatureIncrementalChart._register_with_indexes.<locals>.<genexpr>   s-      LVS**+=74+=+?@Jrp   rY   r[   s   ``    r"   r\   z.FeatureIncrementalChart._register_with_indexes   sb    hhj!%!4!4!6J LV D #J!!$+2248	 "7r-   N)rc   rd   re   rD   r>   r\   r   r-   r"   rg   rg   ~   s    B(=&9r-   rg   c                       e Zd Zd Zy)CompleteFundamentalRulec              #      K   |j                         }|j                  ||d|j                               D ]9  }|j                  |j                               }|j	                  |||      s6| ; y wNT)startrA   is_completelhs)rA   rD   nextsymmove_dot_forwardinsert_with_backpointer)r+   chartgrammar	left_edgerA   
right_edgenew_edges          r"   _apply_incompletez)CompleteFundamentalRule._apply_incomplete   sm     mmo  ,,3Di6G6G6I ' 
J !11*..2BCH,,Xy*M
s   A)A3,A3Nrc   rd   re   r   r   r-   r"   rt   rt      s    	r-   rt   c                        e Zd Z e       Zd Zy)CompleterRulec              #   |   K   t        |t              s&| j                  j                  |||      E d {    y y 7 wr   
isinstancer   _fundamental_ruleapplyr+   r}   r~   r5   s       r"   r   zCompleterRule.apply   s6     $)--33E7DIII *I   1<:<Nrc   rd   re   rt   r   r   r   r-   r"   r   r          /1Jr-   r   c                        e Zd Z e       Zd Zy)ScannerRulec              #   |   K   t        |t              r&| j                  j                  |||      E d {    y y 7 wr   r   r   s       r"   r   zScannerRule.apply   s6     dH%--33E7DIII &Ir   Nr   r   r-   r"   r   r      r   r-   r   c                       e Zd Zy)PredictorRuleNrc   rd   re   r   r-   r"   r   r          r-   r   c                       e Zd Zd Zy)FilteredCompleteFundamentalRulec              #   h   K   |j                         r| j                  |||      E d {    y y 7 wr   )rx   _apply_completer   s       r"   r   z%FilteredCompleteFundamentalRule.apply   s4      ++E7DAAA As   '202N)rc   rd   re   r   r   r-   r"   r   r      s    Br-   r   c                       e Zd Zd Zy)FeatureCompleteFundamentalRulec              #      K   | j                   }|j                         }|j                  ||d|j                               D ]  }|j	                  ||||      E d {      y 7 wrv   )r   rA   rD   rz   r   )r+   r}   r~   r   frrA   r   s          r"   r   z0FeatureCompleteFundamentalRule._apply_incomplete   sh     ##mmo  ,,3Di6G6G6I ' 
J xxw	:FFF
 Gs   AA&A$A&Nr   r   r-   r"   r   r      s    Gr-   r   c                       e Zd Z e       Zy)FeatureCompleterRuleNrc   rd   re   r   r   r   r-   r"   r   r          68r-   r   c                       e Zd Z e       Zy)FeatureScannerRuleNr   r   r-   r"   r   r      r   r-   r   c                       e Zd Zy)FeaturePredictorRuleNr   r   r-   r"   r   r      r   r-   r   c                   (    e Zd ZdZeddefdZddZy)IncrementalChartParsera  
    An *incremental* chart parser implementing Jay Earley's
    parsing algorithm:

    | For each index end in [0, 1, ..., N]:
    |   For each edge such that edge.end = end:
    |     If edge is incomplete and edge.next is not a part of speech:
    |       Apply PredictorRule to edge
    |     If edge is incomplete and edge.next is a part of speech:
    |       Apply ScannerRule to edge
    |     If edge is complete:
    |       Apply CompleterRule to edge
    | Return any complete parses in the chart
    r   2   c                 &   || _         || _        || _        || _        g | _        g | _        |D ]b  }|j                  dk(  r| j                  j                  |       .|j                  dk(  r| j
                  j                  |       Yt        d       y)a  
        Create a new Earley chart parser, that uses ``grammar`` to
        parse texts.

        :type grammar: CFG
        :param grammar: The grammar used to parse texts.
        :type trace: int
        :param trace: The level of tracing that should be used when
            parsing a text.  ``0`` will generate no tracing output;
            and higher numbers will produce more verbose tracing
            output.
        :type trace_chart_width: int
        :param trace_chart_width: The default total width reserved for
            the chart in trace output.  The remainder of each line will
            be used to display edges.
        :param chart_class: The class that should be used to create
            the charts used by this parser.
        r   r`   z9Incremental inference rules must have NUM_EDGES == 0 or 1N)	_grammar_trace_trace_chart_width_chart_class_axioms_inference_rules	NUM_EDGESrS   rP   )r+   r~   strategytracetrace_chart_widthchart_classrules          r"   __init__zIncrementalChartParser.__init__/  s    4  "3' "D~~"##D)1$%%,,T2 R  r-   Nc           	      >   || j                   }| j                  }t        |      }| j                  j	                  |       | j                  |      }| j                  }| j                  |j                         dz   z  }|rt        |j                  |             | j                  D ])  }t        |j                  ||            } ||||||       + | j                  }	t        |j                         dz         D ]  }
|dkD  rt        d|
d       t        |j                  |
            }|s3|j                         }|	D ]V  }t        |j                  |||            } ||||||       |D ]'  }|j!                         |
k(  s|j#                  |       ) X |rn |S )Nr`   z
* Processing queue:
)rA   )r   _trace_new_edgesr/   r   check_coverager   r   rb   printpretty_format_leavesr   r   r   ra   rD   poprA   rS   )r+   tokensr   trace_new_edgesr}   r~   trace_edge_widthaxiom	new_edgesinference_rulesrA   agendar5   r   r   s                  r"   chart_parsez"IncrementalChartParser.chart_parseZ  s~   =KKE//f$$V,!!&)--  22u7G7G7IA7MN%,,-=>?\\EU[[89IE5)U<LM " //))+a/0Cqy-sD9%,,3,/0Fzz|+D $TZZw%E FI#E4ECST$-#<<>S0"MM(3 %. , 	 1 r-   r   )rc   rd   re   __doc__BU_LC_INCREMENTAL_STRATEGYr   r   r   r   r-   r"   r   r     s     $ ,$)V!r-   r   c                       e Zd Zd Zy)EarleyChartParserc                 <    t        j                  | |t        fi | y r   )r   r   EARLEY_STRATEGYr+   r~   parser_argss      r"   r   zEarleyChartParser.__init__  s    ''gV+Vr-   Nrc   rd   re   r   r   r-   r"   r   r   ~  s    Wr-   r   c                       e Zd Zd Zy)IncrementalTopDownChartParserc                 <    t        j                  | |t        fi | y r   )r   r   TD_INCREMENTAL_STRATEGYr   s      r"   r   z&IncrementalTopDownChartParser.__init__       '''2	
6A	
r-   Nr   r   r-   r"   r   r         
r-   r   c                       e Zd Zd Zy)IncrementalBottomUpChartParserc                 <    t        j                  | |t        fi | y r   )r   r   BU_INCREMENTAL_STRATEGYr   s      r"   r   z'IncrementalBottomUpChartParser.__init__  r   r-   Nr   r   r-   r"   r   r     r   r-   r   c                       e Zd Zd Zy)(IncrementalBottomUpLeftCornerChartParserc                 <    t        j                  | |t        fi | y r   )r   r   r   r   s      r"   r   z1IncrementalBottomUpLeftCornerChartParser.__init__  s     '''5	
9D	
r-   Nr   r   r-   r"   r   r     r   r-   r   c                       e Zd Zd Zy) IncrementalLeftCornerChartParserc                 r    |j                         st        d      t        j                  | |t        fi | y )NzNIncrementalLeftCornerParser only works for grammars without empty productions.)is_nonemptyrP   r   r   LC_INCREMENTAL_STRATEGYr   s      r"   r   z)IncrementalLeftCornerChartParser.__init__  s@    ""$-  	'''2	
6A	
r-   Nr   r   r-   r"   r   r     s    
r-   r   c                       e Zd ZedefdZy)FeatureIncrementalChartParser   c                 :    t        j                  | |f|||d| y )N)r   r   r   )r   r   )r+   r~   r   r   r   r   s         r"   r   z&FeatureIncrementalChartParser.__init__  s2     	''	
 /#	
 	
r-   N)rc   rd   re   "BU_LC_INCREMENTAL_FEATURE_STRATEGYrg   r   r   r-   r"   r   r     s     4+
r-   r   c                       e Zd Zd Zy)FeatureEarleyChartParserc                 <    t        j                  | |t        fi | y r   )r   r   EARLEY_FEATURE_STRATEGYr   s      r"   r   z!FeatureEarleyChartParser.__init__  s     %..'2	
6A	
r-   Nr   r   r-   r"   r   r     r   r-   r   c                       e Zd Zd Zy)$FeatureIncrementalTopDownChartParserc                 <    t        j                  | |t        fi | y r   )r   r   TD_INCREMENTAL_FEATURE_STRATEGYr   s      r"   r   z-FeatureIncrementalTopDownChartParser.__init__       %..':	
>I	
r-   Nr   r   r-   r"   r   r     r   r-   r   c                       e Zd Zd Zy)%FeatureIncrementalBottomUpChartParserc                 <    t        j                  | |t        fi | y r   )r   r   BU_INCREMENTAL_FEATURE_STRATEGYr   s      r"   r   z.FeatureIncrementalBottomUpChartParser.__init__  r   r-   Nr   r   r-   r"   r   r     r   r-   r   c                       e Zd Zd Zy)/FeatureIncrementalBottomUpLeftCornerChartParserc                 <    t        j                  | |t        fi | y r   )r   r   r   r   s      r"   r   z8FeatureIncrementalBottomUpLeftCornerChartParser.__init__  s     %..'=	
AL	
r-   Nr   r   r-   r"   r   r     r   r-   r   c                 .   ddl }ddl}ddlm}  |       }	|rt	        d       t	        |	       t	        d       t	        |       |j                         }
t	        |
       t	                t        |	|      }t               }|j                  |
      }t        |j                  |	j                                     }t               |z
  }|rt        |      |k(  sJ d       |r|D ]  }t	        |        nt	        dt        |             | rt	        d	|       yy)
z0
    A demonstration of the Earley parsers.
    r   N)demo_grammarz	* Grammarz* Sentence:)r   zNot all parses foundz	Nr trees:zTime:)systimenltk.parse.chartr   r   splitr   r   r   r/   parsesrw   len)print_timesprint_grammarprint_treesr   sent	numparsesr   r   r   r~   r   earleytr}   r   trees                   r"   demor    s     - nGkg 
-	$KZZ\F	&M	G we4FAv&E%,,w}}/0FA 6{i'?)??'D$K  	k3v;'gq r-   __main__N)TFT   z$I saw John with a dog with my cookie   );r   r   r   r   r   r   r   r   r   r	   r
   r   r   r   r   r   r   nltk.parse.featurechartr   r   r   r   r   r   r   r   r   rg   rt   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r  rc   r   r-   r"   <module>r     s       	 	 	 >,u >,B.9. .9l
7 
J+ JJ) J	, 	B&G B	G%E 	G9= 99 9	4 	 NOMO N	  N	  N 	  N&(#% \[ \~W. W

$: 

%; 

/E 
	
'= 	
" N  N"$	#  N "$	#  N%'"$	& "
$:<N 
&
< 

+H 

,I 

6S 
 
	/-` zF r-   