
    g                         d Z ddlZddlZddlZddlZddlmZmZmZm	Z	m
Z
mZmZmZmZmZ ddlmZmZ ddlmZ ddlmZmZ ddlmZ ddlmZmZ dd	lmZ dd
lm Z  ddl!m"Z"  G d d      Z#d Z$e%dk(  r e$        dgZ&y)zl
A graphical tool for exploring the regular expression based chunk
parser ``nltk.chunk.RegexpChunkParser``.
    N)
ButtonCanvasCheckbuttonFrameIntVarLabelMenu	ScrollbarTextTk)askopenfilenameasksaveasfilename)Font)
ChunkScoreRegexpChunkParser)RegexpChunkRule)	conll2000treebank_chunk)ShowText)Tree)in_idlec                       e Zd ZdZi dddddddd	d
dddddddddddddddddddddd d!d"d#i d$d%d&d'd(d)d*d+d,d-d.d/d0d1d2d3d4d5d6d7d8d9d:d;d<d=d>d?d@dAdBdCdDdEdFdGdHdIdJdKdLdMdNdOdPdQZg dRZdS edTU      fdV edWU      fdX edYZ      fd[ ed\]      fd^ ed\]      fd_ ed`d`a      fdb edcdda      fde edfU      fdg edhU      fdi edjU      fg
ZdkZ	 dlZ		 dmZ
	 dnZ	 doZ	  edpdqdrdrdkdsdtduv      Z edldldrdrdwdkdsdtdux	      Z edydzd{d{dkdsdtdud|}	      Z ed~dsdt      Z edd      Z eddtdtd      Z ed{d{dkdsdtdd      Z eddd      ZdZdrZ edrZ      Z edZ      ZdZd Z	 	 	 	 	 ddZd Zd Zd Zd ZdZ dZ!d Z"dZ#d Z$d Z%d Z&dZ'd Z(d Z)d Z*d Z+d Z,d Z-d Z.d Z/d Z0ddZ1d Z2d Z3d Z4d Z5ddZ6d Z7d Z8d Z9dZ:ddZ;ddZ<ddZ=d Z>ddZ?ddZ@d ZAy)RegexpChunkAppz
    A graphical tool for exploring the regular expression based chunk
    parser ``nltk.chunk.RegexpChunkParser``.

    See ``HELP`` for instructional text.
    CCzCoordinating conjunctionzPRP$zPossessive pronounCDzCardinal numberRBAdverbDT
DeterminerRBRzAdverb, comparativeEXzExistential thereRBSzAdverb, superlativeFWzForeign wordRPParticleJJ	AdjectiveTOtoJJRzAdjective, comparativeUHInterjectionJJSzAdjective, superlativeVBzVerb, base formLSzList item markerVBDzVerb, past tenseMDModalNNSzNoun, pluralNNzNoun, singular or massVBNzVerb, past participleVBZzVerb,3rd ps. sing. presentNNPzProper noun, singularNNPSzProper noun pluralWDTzwh-determinerPDTPredeterminerWPz
wh-pronounPOSzPossessive endingzWP$zPossessive wh-pronounPRPzPersonal pronounWRBz	wh-adverb(zopen parenthesis)zclose parenthesisz
open quotecommazclose quoteperiodzpound sign (currency marker)zdollar sign (currency marker)zPreposition/subord. conjunctionz#Symbol (mathematical or scientific)zVerb, gerund/present participlezVerb, non-3rd ps. sing. presentcolon)z``,z''.#$INSYMVBGVBP:))Help20a-  Welcome to the regular expression chunk-parser grammar editor.  You can use this editor to develop and test chunk parser grammars based on NLTK's RegexpChunkParser class.

Use this box ('Help') to learn more about the editor; click on the tabs for help on specific topics:<indent>
Rules: grammar rule types
Regexps: regular expression syntax
Tags: part of speech tags
</indent>
Use the upper-left box ('Grammar') to edit your grammar.  Each line of your grammar specifies a single 'rule', which performs an action such as creating a chunk or merging two chunks.

The lower-left box ('Development Set') runs your grammar on the development set, and displays the results.  Your grammar's chunks are <highlight>highlighted</highlight>, and the correct (gold standard) chunks are <underline>underlined</underline>.  If they match, they are displayed in <green>green</green>; otherwise, they are displayed in <red>red</red>.  The box displays a single sentence from the development set at a time; use the scrollbar or the next/previous buttons view additional sentences.

The lower-right box ('Evaluation') tracks the performance of your grammar on the development set.  The 'precision' axis indicates how many of your grammar's chunks are correct; and the 'recall' axis indicates how many of the gold standard chunks your system generated.  Typically, you should try to design a grammar that scores high on both metrics.  The exact precision and recall of the current grammar, as well as their harmonic mean (the 'f-score'), are displayed in the status bar at the bottom of the window.)Rules10a  <h1>{...regexp...}</h1><indent>
Chunk rule: creates new chunks from words matching regexp.</indent>

<h1>}...regexp...{</h1><indent>
Strip rule: removes words matching regexp from existing chunks.</indent>

<h1>...regexp1...}{...regexp2...</h1><indent>
Split rule: splits chunks that match regexp1 followed by regexp2 in two.</indent>

<h1>...regexp...{}...regexp...</h1><indent>
Merge rule: joins consecutive chunks that match regexp1 and regexp2</indent>
)Regexps10 60aZ  <h1>Pattern		Matches...</h1>
<hangindent>	<<var>T</var>>	a word with tag <var>T</var> (where <var>T</var> may be a regexp).
	<var>x</var>?	an optional <var>x</var>
	<var>x</var>+	a sequence of 1 or more <var>x</var>'s
	<var>x</var>*	a sequence of 0 or more <var>x</var>'s
	<var>x</var>|<var>y</var>	<var>x</var> or <var>y</var>
	.	matches any character
	(<var>x</var>)	Treats <var>x</var> as a group
	# <var>x...</var>	Treats <var>x...</var> (to the end of the line) as a comment
	\<var>C</var>	matches character <var>C</var> (useful when <var>C</var> is a special character like + or #)
</hangindent>
<h1>Examples:</h1>
<hangindent>	<regexp><NN></regexp>
		Matches <match>"cow/NN"</match>
		Matches <match>"green/NN"</match>
	<regexp><VB.*></regexp>
		Matches <match>"eating/VBG"</match>
		Matches <match>"ate/VBD"</match>
	<regexp><IN><DT><NN></regexp>
		Matches <match>"on/IN the/DT car/NN"</match>
	<regexp><RB>?<VBD></regexp>
		Matches <match>"ran/VBD"</match>
		Matches <match>"slowly/RB ate/VBD"</match>
\t<regexp><\#><CD> # This is a comment...</regexp>\n		Matches <match>"#/# 100/CD"</match>
</hangindent>)TagsrS   zB<h1>Part of Speech Tags:</h1>
<hangindent><<TAGSET>></hangindent>
redz#a00
foregroundgreenz#080	highlightz#ddd
background	underlineT)r\   h1indent   lmargin1lmargin2
hangindentr   <   varz#88fregexpz#ba7matchz#6a6      g?g{Gz?g{Gz?(      z#efegroove   word)widthheightr[   highlightbackgroundhighlightthicknessreliefborderwrapz#555)	ro   rp   r[   rq   rW   rr   rs   rt   ru   F   
   z#eef)   )	ro   rp   r[   rq   rr   rs   rt   ru   tabsz#9bb)r[   rs   rt   	helveticaifamilysizez#777   )r[   padxpadyrt   i,  i  )r[   rq   rr   rs   rt   ro   rp   )r[   activebackgroundrq   z#aba   c                     t        j                  dd|      }t        j                  dd|      }t        j                  dd|      }|j                         }t        j                  dd|      }|S )	N((\\.|[^#])*)(#.*)?\1z + z\n\s+z\nz	([^\\])\$z\1\\$)resubstrip)selfgrammars     M/var/www/openai/venv/lib/python3.12/site-packages/nltk/app/chunkparser_app.pynormalize_grammarz RegexpChunkApp.normalize_grammar4  s[    &&/@&&sG,&&5'2--/&&x9    Nc                 &   || _         || j                  }|| _        |C|dk(  rt        j                  d      }n(|dk(  rt        j                         }nt        d|z        d| _        	 || _        	 d| _	        	 d| _
        	 || _        	 || _        	 d| _        	 d| _        	 g | _        	 d| _        	 d| _        	 d| _        	 d| _        	 t)        |      | _        	 t-               x}| _        |j1                  d	       |j3                  d
       |j5                  d| j6                         t9        |      | _        | j:                  j=                  d       | j?                  |       | jA                  |       | jC                  |       | jE                  |       | jF                  jI                          |r;| jF                  jK                  d|dz          | jF                  jM                  dd       | jO                  d       | jQ                          y)a  
        :param devset_name: The name of the development set; used for
            display & for save files.  If either the name 'treebank'
            or the name 'conll2000' is used, and devset is None, then
            devset will be set automatically.
        :param devset: A list of chunked sentences
        :param grammar: The initial grammar to display.
        :param tagset: Dictionary from tags to string descriptions, used
            for the help page.  Defaults to ``self.TAGSET``.
        Nr   z	train.txttreebankzUnknown development set %sr   chunk_labelz+50+50zRegexp Chunk Parser Appz<Control-q>d   end
insert1.0))_chunk_labelTAGSETtagsetr   chunked_sentsr   
ValueErrorchunkerr   normalized_grammargrammar_changeddevsetdevset_namedevset_index_last_keypress_history_history_index_eval_grammar_eval_normalized_grammar_eval_indexr   _eval_scorer   topgeometrytitlebinddestroyr   _devset_sizeset_init_fonts_init_widgets_init_bindings_init_menubar
grammarboxfocusr   mark_setshow_devsetupdate)r   r   r   r   r   r   r   s          r   __init__zRegexpChunkApp.__init__?  s
   $ (>[[F >k)"00=:%'557 !=!KLL7)"&7 ;A&?	-  =	8  	I "	 )-%5	5 &+>	: dhX		+,- #3Kc" 	3C 3 OO""5'D.9OO$$Xu5 	r   c                 j    |j                  d j                         |j                  d j                         |j                  d j                         |j                  d j                         |j                  d fd       |j                  d fd        j
                  j                  d j                          j
                  j                  d j                          j
                  j                  d j                          j                  j                  d	 j                         y )
Nz<Control-n>z<Control-p>z<Control-t>z
<KeyPress>z<Control-s>c                 $    j                         S N)save_grammarer   s    r   <lambda>z/RegexpChunkApp._init_bindings.<locals>.<lambda>      $*;*;*=r   z<Control-o>c                 $    j                         S r   )load_grammarr   s    r   r   z/RegexpChunkApp._init_bindings.<locals>.<lambda>  r   r   z<Configure>)r   _devset_next_devset_prevtoggle_show_tracer   r   evalbox
_eval_plotr   r   s   ` r   r   zRegexpChunkApp._init_bindings  s     1 12 1 12 6 67t{{+ => =>]D,B,BC]D,=,=>]D,=,=> 	-9r   c                 $   t        |      | _        | j                  j                  d       t        d| j                  j	                                | _        t        dt        | j                  j	                         dz  dz               | _        y )Nr_   rz   r{      )r   _sizer   r   get_fontint
_smallfontr   s     r   r   zRegexpChunkApp._init_fonts  sg    C[


rDJJNN4D3DE
s4::>>+;b+@B+F'G%H
r   c                    t        |      }t        |d      }|j                  dd| j                         |j                  ddd| j                         |j                  ddd	| j                         |j                  d
d| j
                         |j                  dd| j                  d       |j                  dd|       t        |d      }|j                  d| j                  dd| j                         |j                  d| j                  dd| j                         |j                  d| j                  dd| j                         |j                  d| j                  dd| j                         |j                  d| j                  dd| j                         |j                  dd|       t        |d      }|j                  d| j                  d| j                          |j                  d!| j                  d"| j                          |j                  d#| j                  d$| j                          |j                  d%| j                  d&| j                          |j                  d'd|       t        |d      }|j                  d(d| j                         |j                  d)d|       |j                  |*       y )+Nr   )tearoffzReset Application)labelr\   commandzSave Current GrammarzCtrl-s)r   r\   acceleratorr   zLoad GrammarzCtrl-ozSave Grammar History   Exitrh   zCtrl-q)r   r\   r   r   File)r   r\   menuTinyrw   )r   variabler\   valuer   Small   Mediumr_   Large   Huge"   Viewz50 sentences2   )r   r   r   r   z100 sentencesr   z200 sentences   z500 sentencesi  zDevelopment-SetAboutrN   )r   )r	   add_commandresetr   r   save_historyr   add_cascadeadd_radiobuttonr   resizer   set_devset_sizeaboutconfig)r   parentmenubarfilemenuviewmenu
devsetmenuhelpmenus          r   r   zRegexpChunkApp._init_menubar  s   v,+#6!TZZX( %%	 	 	
 	  %%	 	 	
 	(B@Q@Q 	 	
 	At|| 	 	
 	&AHE+  ZZKK 	! 	
 	  ZZKK 	! 	
 	  ZZKK 	! 	
 	  ZZKK 	! 	
 	  ZZKK 	! 	
 	&AHE'1-
"" &&((	 	# 	
 	""!&&((	 	# 	
 	""!&&((	 	# 	
 	""!&&((	 	# 	
 	"3qzR+7aL&AHE7#r   c                 ^    | j                   r| j                          y| j                          y)Nbreak)_showing_tracer   
show_tracer   r   s     r   r   z RegexpChunkApp.toggle_show_trace#  s+      OOr      Fc                 
   |j                  d| j                  j                               }|j                  d| j                  j                               }| j                  j	                  d       | j                  j                  d|dz  dz
  ddd	      }| j                  j                  |      d   d
z   |dz
  }}| j                  j                  |||z
  dz  z   |dz
  ddd      }d| j                  j                  |      d   dz
  }	}| j                  d   }
| j                  j                  | j                  j                  dd|dz
  d|
|
             | j                  j                  | j                  j                  d|	dz   dd|
|
             | j                  j                         rt        | j                        dkD  rdx}}dx}}t        dt        t        | j                        | j                  dz               D ]G  }| j                  |    \  }}}}t        ||      }t        ||      }t!        ||      }t!        ||      }I t!        |dz
  d      }t!        |dz
  d      }t        |dz   d      }t        |dz   d      }ndx}}dx}}t        d      D ]  }|||z
  |dz  |z
  ||z
  z  z  z   }|	|	|z
  |dz  |z
  ||z
  z  z  z
  }||cxk  r|k  r#n n | j                  j#                  ||||	d       ||cxk  r|	k  skn n| j                  j#                  ||||d        | j                  j#                  ||||	       | j                  j#                  ||	||	       | j                  j                  |dz
  |	dddd|z  z  	       | j                  j                  |dz
  |dddd|z  z  	       | j                  j                  ||	dz   dddd|z  z  	       | j                  j                  ||	dz   dddd|z  z  	       d x}}t%        | j                        D ]6  \  }\  }}}}|||z
  ||z
  ||z
  z  z  z   }|	|	|z
  ||z
  ||z
  z  z  z
  }|| j&                  k(  rU| j                  j)                  |dz
  |dz
  |dz   |dz   d d!       d"|dz  z  d#|dz  z  z   d$|dz  z  z   | j*                  d%<   nF| j                  j                  | j                  j)                  |dz
  |dz
  |dz   |dz   d&d'             |S| j,                  j                         r9| j                  j                  | j                  j#                  ||||d'             ||}}9 y )(Nro   rp   allrw   rm   leftw	Precision)justifyanchortextr   sRecallcenter)r  r  r  rh   r[   r   i  )filloutlineg{Gz?   g      $@z#888)r
  r~   rightsez%d%%r   nenwz#0f0z#000zPrecision: %.2f%%	zRecall: %.2f%%	zF-score: %.2f%%r  #afaz#8c8)r   r   winfo_widthwinfo_heightdeletecreate_textbbox_EVALBOX_PARAMSlowercreate_rectangle
_autoscalelenr   rangemin_SCALE_Nmaxcreate_line	enumerater   create_ovalstatus_eval_lines)r   r   r   ro   rp   tagr  r  r   botbgmax_precision
max_recallmin_precision
min_recallir   	precisionrecallfmeasurexyprev_xprev_y_fscores                             r   r   zRegexpChunkApp._eval_plot-  s   

7DLL$<$<$>?Hdll&?&?&AB 	E" ll&&!b &; ' 
 ll'',Q/!3URZell&&EDLQ&&RK ' 
 t||((-a025S !!,/LL))!Qq$RQS)T	
 	LL))!S1WdDrSU)V	

 ?? S%7!%;)**MJ)**MJ1c#dmm"4dmma6GHI7;}}aR7H4FH #I} = 4
 #I} = 4
 J$   4a8MZ$.2J 4a8MZ$.2J)**MJ)**MJ rATJ&:
+BC A sSyTM)mm.KL A a%((CCf(EQ}}((q%(H  	  sD#6  sE37 	  1H3./ 	! 	
 	  1H3./ 	! 	
 	  !G3+, 	! 	
 	  !G3+, 	! 	
 1:4==1I-A-9ff*$j)@A A sSy]*}}/LM A D'''((E1q5!a%QVV )  *Y_=(FSL9:'6C<89 F# ""LL,,Aq1ua!eQU - 
 !d&6&6&:&:&<""LL,,VVQ,O FF5 2Jr   c                    | j                   y | j                  d| _        y t        j                         }t        j                         | j                  z
  | j
                  k  r\| j                  | j                  k7  rCd| _        | j                   j                  t        | j                  dz        | j                        S | j                  | j                  k7  r| j                  D ]  \  }}}}| j                  | j                  |      k(  s'| j                  j                  ||||f       t        | j                        dz
  | _        | j#                          d| _        d | _         y  d| _        t'        | j(                        | _        | j,                  | _        | j                  | _        | j                  j1                         dk(  rd| _        y | j2                  | j$                  t5        | j$                  | j6                  z   | j8                  j;                                D ]=  }| j=                  |j?                               }| j*                  jA                  ||       ? | xj$                  | j6                  z  c_        | j$                  | j8                  j;                         k\  r| j                  j                  | j.                  | j*                  jC                         | j*                  jE                         | j*                  jG                         f       t        | j                        dz
  | _        | j#                          d| _        d | _        y d| j$                  z  | j8                  j;                         z  }d	|z  | jH                  d
<   d| _        | jK                  t        j                         |z
         | j                   j                  t        | j                  dz        | j                         y )NFTi  rh   r   r    r   z$Evaluating on Development Set (%d%%)r  )&r   r   _eval_demon_runningtimer   _EVAL_DELAYr   r   afterr   
_EVAL_FREQ_eval_demonr   r   appendr  r   r   r   r   r   r   r   r   r   r   r  _EVAL_CHUNKr   r   _chunkparseleavesscorer-  r.  	f_measurer#  _adaptively_modify_eval_chunk)	r   t0gprfgoldguessprogresss	            r   r=  zRegexpChunkApp._eval_demon  s<   88<<',D$ YY[ IIK$---0@0@@''4+H+HH'+D$88>>#doo&<"=t?O?OPP ""d&C&CC #mm
1a**d.D.DQ.GGMM((!Q16*-dmm*<q*@D'OO%/4D,48D1 ,  !D)d6G6GHD!%D,0,C,CD) ""((*b0',D$ KKs  4#3#33T5F5F5J5J5L 
D
 $$T[[]3E""4/
 	D,,, t004466MM  &&$$..0$$++-$$..0	 #&dmm"4q"8DOO',D$,0D)T---0A0A0E0E0GGH"H8"SDKK'+D$..tyy{R/?@HHNN3t568H8HIr   c           
         || j                   kD  rh| j                  dkD  rYt        | j                  dz
  t        t	        | j                  | j                   |z  z        | j                  dz
              | _        y|| j
                  k  rYt        | j                  dz   t        t	        | j                  | j
                  |z  z        | j                  dz               | _        yy)z
        Modify _EVAL_CHUNK to try to keep the amount of time that the
        eval demon takes between _EVAL_DEMON_MIN and _EVAL_DEMON_MAX.

        :param t: The amount of time that the eval demon took.
        r   rh   rw   N)_EVAL_DEMON_MAXr?  r  r  r   _EVAL_DEMON_MIN)r   ts     r   rD  z,RegexpChunkApp._adaptively_modify_eval_chunk  s     t###(8(81(<"  1$((D,@,@1,DEF$$r) D %%%"  1$((D,@,@1,DEF$$r) D &r   c                     t        |fi  j                  }|j                  dd       |j                  dd       |j                  dd       |j                  dd       t	        |fd j
                  i j                   _        t        | j
                  d	d
 j                  d          _	         j                  j                  ddd        j                  j                  ddd       t        | j                  j                        }|j                  ddd        j                  j                  |j                          j                  d   }t        ||      }|j                  ddd       t        |fd j                   d j"                  j%                  d       t        |fd j&                  d j"                  j%                  d       t	        |fd j(                  i j*                   _         j,                  j                  ddd       i  _         j                  d   }t        ||      }|j                  ddd       t1         j2                        D ]  \  }\  }}	}
t        || j(                        }|j                  |dz  dd       |j5                  d|f fd	       | j.                  |<   t        |d j6                  |      j                  |dz  dz   d         j.                   j2                  d   d      j9                   j
                           j,                  j;                  d!d"#        j<                  D ]%  \  }}  j,                  j:                  d$|z  fi | '  j?                   j2                  d   d          t        | j,                  j                        } j,                  j                  |j                         |j                  ddd       t        | j                  d         }t	        |fd j
                  i j@                   _!         jB                  j%                  d"d%&       t        | j
                  d'd( j@                  d   )       _"         jD                  j                  ddd       |j                  ddd       t        | jF                         _$         jH                  j                  ddd       t        | jB                  jJ                  d*+       _&         jL                  j                   jB                  d,<    jL                  j%                  d-d./        j                  d   }t        ||      }|j                  dd0d       t        |fd1 jN                  d j"                  j%                  d       t        |fd2 jP                  d j"                  j%                  d       t        |fd3 jR                  d4d5 j"                   _*         jT                  j%                  d(       t        |fd6 jV                  d j"                   _,         jX                  j%                  d(       t[        |fi  j\                   _/        t        | j
                  d7d( j\                  d   )      }|j                  ddd        j^                  j                  dddd8        j                  d   }t        ||      }|j                  dd0d       ta         jb                         _2         jd                  j                  d9       tg        |f jd                   jh                  d:d; j"                  j%                  d       ta         jb                         _5         jj                  j                  d9       tg        |f jj                   jh                  d<d; j"                  j%                  d       t        |fd=d>i j"                  j%                  d(       t        |fd j
                  i jl                   _7         jn                  j                  dd?d@dddA       d4 j,                  dB<   d4 jB                  dB<    j                  d   }t        |dCd|      j                  dd       t        |ddC|      j                  dd       t        |dDd|      j                  ddE       |j%                  d%d"F        jB                  j;                  dGdHdIJ        jB                  j;                  dKdIdLM        jB                  j;                  dNdO        jB                  j;                  dPdQdRS        jB                  j;                  dTdUdRV        jB                  j;                  dWdLX        j                  j;                  dWdY        j                  j;                  dZd[X        j                  j;                  d\d]X        j                  j;                  d^d_X        j                  j;                  d`ddab       y )cNr      )weightr~   rm   rh   r   fontGrammar:blackr[   )rT  r  highlightcolorr[   SW)columnrowstickyNEWS)r   NWS)yscrollcommandrZ   EWzPrev Grammar)r  r   r  )sidezNext Grammar)r  rT  Sz<ButtonPress>c                 &    j                  |      S r   )	show_help)r   tabr   s     r   r   z.RegexpChunkApp._init_widgets.<locals>.<lambda>N  s    4>>#;Nr   )rp   ro   r[   )rY  rZ  )rT  elideT)re  tag-%sboth)expandr
  zDevelopment Set:r  )rT  r  r  r[   horiz)r   orientxscrollcommandbottomr0  )r`  r
     zPrev Example (Ctrl-p)zNext Example (Ctrl-n)zShow exampledisabled)r  r   statez
Show tracezEvaluation:)rY  rZ  r[  
columnspanFZoom)r   r   r  Linesr  History	   NEW)rY  rZ  r[  r   r   rp  ro  rw   r      )r
  rh  true-posr  True)r[   r\   	false-negz#800)r\   rW   	false-posz#faatracez#666none)rW   ru   
wrapindentrx   )rb   ru   errorrV   z#feccommentz#840anglez#00fbracez#0a0rc   rj   r`   )8r   _FRAME_PARAMSgrid_columnconfiguregrid_rowconfigurer   r   _GRAMMARBOX_PARAMSr   r   grammarlabelgridr
   yviewr   r   r   _history_prev_BUTTON_PARAMSpack_history_nextr   _HELPBOX_PARAMShelpboxhelptabsr!  HELPr   _HELPTAB_SPACER	configure
tag_configHELP_AUTOTAGrc  _DEVSETBOX_PARAMS	devsetboxdevsetlabel_devset_scrolldevset_scrollxviewdevset_xscrollr   r   r   devset_buttonr   trace_buttonr   r  r   r   r   r  r   r   r$  _STATUS_PARAMSr#  )r   r   frame0grammar_scrollbarr'  frame3helptab_framer,  rd  tabstopsr  r   r%  paramshelp_scrollbarframe4frame1frame2s   `                 r   r   zRegexpChunkApp._init_widgets  s	   s1d001##Aa#0##Aa#0  1 -  1 - vRDJJR$:Q:QR!"..|<
 	aQt<A1V< &fdoo6K6KLaQu=.?.C.CD -v"-1!D1	
&&	
 !!		

 $F$
	
&&	
 !!		

 $F$
 FQQD<P<PQ&9-f4!48(1$))(<$A$Xt-cHEJJa!e3J7 JJc(NO!&DMM#at/C/CPRd!a%!)d+ )= 	diil1o&00djj0At4,,KC#DLL##HsN=f= -tyy|A' #64<<3E3EF>+=+=>1!E: v$*<*<\*JKfP4::P9O9OP4f5 #--l;
 	QAd;1!F3 'vt7J7JKqa>'DNN00
 ,0+>+>+B+B'(  hS 9 -v"-1!D1	
(%%	
 !!		

 $F$
	
(%%	
 !!		

 $F$
#
$$	

 !!
 	W-"
%t
BFBUBU
 	G, f=(<(<=++L9
 	

!4
0&QG -v"-1!D1 *E"	
__OO		

 !!	
 $F$
!$((+U#	
%%OO		

 !!	
 $F$
v=I=)<)<=BBBP FKKt7J7JKqQQSTU !+W",w -fRqR8==QA=NfQbR8==QA=NfQaB7<<A1<M 	- 	!!*6!R!!+F!S!!+&!A!!'f6!J!!,&!I!!'f!= 	""7v">""9"@""7v">""7v">""<!b"Ir   c           	         d| _         d| j                  d<   d| j                  d<   d| j                  d<   | j                  j	                  dd       d| j
                  dz   | j                  j                         fz  | j                  d	<   | j                  :| j                  j                  dd
       | j                  j                  ddd       y | j                  | j
                     }| j                  j                         }d}dg}t        |j                               D ]*  \  }\  }}|d|z  z  }|j!                  t#        |             , t%        t#        |      dz         D 	
ci c]#  }	t%        t#        |            D ]
  }
|	|
f||
    % c}
}	| _        t%        t#        |      dz         D 	ci c]  }	|	|	dz  dz    c}	| _        t%        t#        |      dz         D ]r  }	|	dk(  r:| j                  j                  dd       | j                  j                  ddd       nB| j                  j                  dd||	dz
     z         | j                  j                  ddd       | j                  j                  d|dz          | j                  j                  ddd       t+        |d |	       }| j-                  |j                               }| j/                  |      }| j/                  |      }|j1                  |      D ]  }| j3                  |	|d        ||z
  D ]  }| j3                  |	|d        ||z
  D ]  }| j3                  |	|d        u | j                  j                  dd       | j                  j                  ddd       | j4                  j7                  d| j8                  j:                  dd       y c c}
}	w c c}	w )NTrn  ro  normalr   r   Development Set (%d/%d)rh   r  z#Trace: waiting for a valid grammar.r~  	z%s rm   r   zStart:
r{  zend -2c linestartzend -2cz
Apply %s:
r   r}  rw  ry  rz  z
Finished.
r   g333333?)r   r  r  r  r  r   r   r   r  r   r   tag_addr   rulesr!  rA  r>  r  r  charnumlinenumr   r@  _chunksintersection_color_chunkr   r;  r  r   )r   r   	gold_treer  tagseqr  wordnumrn   posr,  jr   	test_treegold_chunkstest_chunkschunks                   r   r   zRegexpChunkApp.show_trace  sp   "%/'"&.7#"*weU+#<"D$5$5$9$9$;<$
  <<NN!!%)NONN""7E59KK 1 12	""$ #$-i.>.>.@$A G[dCeck!FNN3v;' %B
 3u:>*
*3w<( FGAJ( *

 /4CJN.CD.C1q519.CDs5zA~&AAv%%eZ8&&w0CYO%%e]U1q5\-IJ&&w0CYONN!!%$7NN""<1DiP'bq	2G(()9)9);<I,,y1K,,y1K$11+>!!!UJ7 ?${2!!!UK8 3${2!!!UK8 3) ', 	e]3w(;YG
 	sD//33Q<G

 Es   ((O2O	c                 z   d| j                   d<   | j                   j                  dd       | j                  D ]  \  }}}||k(  r|j                  ddj	                  d t        t        | j                  j                               d 	      D                    } | j                  |   j                  di | j                   | j                   j                  |
       | j                   j                  d|dz          d}| j                  D ]  \  }}d| d| d}t        j                  ||      D ]  }	| j                   j!                  d||	j#                  d      z  ||	j%                  d      z         | j                   j!                  d|z  ||	j#                  d      z  ||	j%                  d      z         | j                   j!                  d||	j#                  d      z  ||	j%                  d      z            | j                  |   j                  di | j&                    d| j                   d<   y )Nr  ro  r   r   z
<<TAGSET>>r   c              3   &   K   | ]	  }d |z    yw)z	%s	%sN ).0items     r   	<genexpr>z+RegexpChunkApp.show_help.<locals>.<genexpr>  s!      %D #T)%   c                 X    t        j                  d| d         xr d| d   fxs d| d   fS )Nz\w+r   rh   )r   rg   )t_ws    r   r   z*RegexpChunkApp.show_help.<locals>.<lambda>"  s8    BHHVSV,D -,!"CF-+ !3q6{-+r   )key)ry   z



















z1.0 + %d charsz(?s)(<z
>)(.*?)(</z>)re  rh   rf  rm   r~   rn  r  )r  r  r  replacejoinsortedlistr   itemsr  r   _HELPTAB_FG_PARAMSr   r  r   finditerr  startr   _HELPTAB_BG_PARAMS)
r   rd  namer  r  Cr%  r  patternms
             r   rc  zRegexpChunkApp.show_help  s    (WE5)$(II D(Ds{|| II $* !2!2!45!+%  +d#**ET-D-DE###2##E4)+;<$#'#4#4KC &se:cU"=G[[$7,,Wa!''!*na!%%PQ(lS,,$sNA
NAaL ,,Wa!''!*na!%%PQ(lS 8 $5 +d#**ET-D-DE9 %.: !+Wr   c                 @    | j                  | j                  dz
         yNrh   r   _view_historyr   r   s     r   r  zRegexpChunkApp._history_prev9      4..23r   c                 @    | j                  | j                  dz          yr  r  r   s     r   r  zRegexpChunkApp._history_next=  r  r   c                 6   t        dt        t        | j                        dz
  |            }| j                  sy || j                  k(  ry d| j
                  d<   | j
                  j                  dd       | j
                  j                  d| j                  |   d          | j
                  j                  dd       || _        | j                  | j                  |   d          | j                  | j                  |   d         | _        | j                  r<| j                  j                  d      D cg c]  }t        j                  |       }}ng }t        |      | _        | j#                          | j%                          | j&                  r| j)                          | j                  t        | j                        dz
  k  r@d	j+                  | j                  dz   t        | j                              | j,                  d
<   y d| j,                  d
<   y c c}w )Nr   rh   r  ro  r   r   r   r   zGrammar {}/{}:r  rU  )r  r  r  r   r   r   r  r   r   _syntax_highlight_grammarr   r   splitr   
fromstringr   r   r   _highlight_devsetr   r   formatr  )r   indexliner  s       r   r  zRegexpChunkApp._view_historyA  s   As3t}}-159:}}D''' $, ue,udmmE&:1&=>  51#&&t}}U';A'>?"&"8"8u9Ma9P"Q"" !3399$??D  **40?  
 E(/ OOT]]!3a!77(8(?(?##a'DMM")Df%
 )3Df%)s   4Hc                 *    | j                  ddd       y)Nscrollrh   pager   r  r   s     r   r   zRegexpChunkApp._devset_nextj  s    Ha0r   c                 *    | j                  ddd       y)Nr  r   r  r   r  r   s     r   r   zRegexpChunkApp._devset_prevn  s    Hb&1r   c                 `    | j                   y | j                   j                          d | _         y r   )r   r   r   s     r   r   zRegexpChunkApp.destroyr  s&    88r   c                    d}| j                   }|dk(  r?|d   j                  d      r+| j                  | j                  t	        |d         z          n|dk(  rB|d   j                  d      r.| j                  | j                  |t	        |d         z  z          nT|dk(  rB| j                  t	        t        |d         | j                  j                         z               nJ d| d|        |r| j                          y y )	Nrh   r  unitr   r  movetozbad scroll command r   )	r   
startswithr   r   r   floatr   r   r   )r   r   argsNshowing_traces        r   r  zRegexpChunkApp._devset_scrollx  s    ++h47#5#5f#=T..T!W=> T!W%7%7%?T..Sa\1AAB StAw$2C2C2G2G2I!IJK<+G9AdV<<1OO r   c                    || j                   }t        t        d|      | j                  j	                         dz
        }|| j                   k(  r| j
                  sy || _         d| _        d| j                  d<   d| j                  d<   d| j                  d<   d| j                  d<   | j                  j                  d	d
       d| j                   dz   | j                  j	                         fz  | j                  d<   | j                  | j                   | j                   dz    }i | _        ddi| _        t        |      D ]  \  }}d}t        |j                               D ]J  \  }\  }}t!        |      | j                  ||f<   || d| dz  }t!        |      | j                  ||dz   f<   L | j                  j#                  d
|d d dz           | j$                  | j'                          d| j                  d<   | j                   | j                  j	                         z  }	| j                   dz   | j                  j	                         z  }
| j(                  j+                  |	|
       y )Nr   rh   Fr  ro  rn  rn   ru   r   r   r  r  r7  /r   r   z

rm   )r   r  r  r   r   r   r  r  r  r  r  r   r  r  r!  rA  r  r   r   r  r  r   )r   r  samplesentnumsentlinestrr  rn   r  firstlasts              r   r   zRegexpChunkApp.show_devset  sC   =%%E C5M4#4#4#8#8#:Q#>?D%%%d.A.A!#%-'"&07# #+w!'veU+#<"D$5$5$9$9$;<$
 
 T..1B1BQ1FG1v&v.MGTG(1$++-(@$$14WWg-.dV1SE++58\Wgk12 )A NN!!%")>? / <<#""$",w !!D$5$5$9$9$;;!!A%):):)>)>)@@ud+r   c                     t               }d}|D ]b  }t        |t              rK|j                         | j                  k(  r|j                  ||t        |      z   f       |t        |      z  }^|dz  }d |S )Nr   rh   )r   
isinstancer   r   r   addr  )r   treechunksr  childs        r   r  zRegexpChunkApp._chunks  sn    E%&;;=D$5$55JJ3u:)=>?3u:%1  r   c                    | j                   y | j                  j                  ddd       | j                  j                  ddd       | j                  j                  ddd       | j                  j                  ddd       t	        |j                  d            D ]O  \  }}|j                         st        j                  d|      }d }|j                  d	      r`|j                  d	      }d
|dz   |j                  d	      fz  }d
|dz   |j                  d	      fz  }| j                  j                  d||       t        j                  d|      D ]  }||j                         |k\  r d
|dz   |j                         fz  }d
|dz   |j                         fz  }|j                         dv r| j                  j                  d||       z| j                  j                  d||        R y )Nr  r   r   r  r  rc   r   z(\\.|[^#])*(#.*)?rm   z%d.%drh   z[<>{}]z<>)r   r   
tag_remover  r!  r  r   r   rg   groupr  r   r  )r   r   linenor  r  comment_startr  r   s           r   r  z(RegexpChunkApp._syntax_highlight_grammar  s   88""9eU;""7E59""7E59eU;%gmmD&9:LFD::<-t4A Mwwqz !
vz1771:66vz155844''	1a8[[40 ,m1Kvz177955vz155733779$OO++GQ:OO++GQ: 1 ;r   c           	         | j                   y | j                  j                  ddd       g | _        t	        |j                  d            D ]E  \  }}t        j                  dd|      }|j                         }|s0	 t        j                  |       G d
| j                  d<   y # t        $ r3}| j                  j                  dd|dz   z  d	|dz   z         Y d }~d }~ww xY w)Nr~  r   r   r   r   r   z%s.0rh   z%s.0 lineendr7  r  )r   r   r  _grammarcheck_errsr!  r  r   r   r   r   r  r   r  r#  )r   r   r  r  r   s        r   _grammarcheckzRegexpChunkApp._grammarcheck  s    88""7E59"$%gmmD&9:LFD660%>D::<D#..t4 ; !F	 " OO++6A:!6&ST*8U s   =B$$	C -)CC c                 N   |rt        j                          | _        | j                  j                  dd      x| _        }| j                  |      }|| j                  k(  ry || _        | j                  t        | j                        dz
  k  rd| j                  d<   | j                  |       	 |r2|j                  d      D cg c]  }t        j                  |       }}ng }t%        |      | _        | j                  j'                  ddd       t        j                          | _        | j*                  r| j-                          n| j/                          | j0                  s| j3                          y y c c}w # t        $ r"}| j!                  |       d | _        Y d }~y d }~ww xY w)Nr   r   rh   rU  r  r   r~  )r9  r   r   r   r   r   r   r   r  r   r  r  r  r   r  r   r  r   r   r  r   r   r   r  r8  r=  )r   eventr   r   r  r  r   s          r   r   zRegexpChunkApp.update  s{   "&))+D "&!4!4UE!BBw "33G<!8!88&8D# T]]!3a!77(2Df%&&w/	! !3 8 8 > > $..t4 >  
  )/""7E59#yy{OO""$'' (+  	w'DL		s*   -E9 E4E9 4E9 9	F$FF$c                 p   |&| j                   | j                  | j                  dz    }| j                  j                  ddd       | j                  j                  ddd       | j                  j                  ddd       t	        |      D ]  \  }}| j                  |j                               }| j                  |      }| j                  |      }|j                  |      D ]  }| j                  ||d        ||z
  D ]  }| j                  ||d        ||z
  D ]  }| j                  ||d         y )Nrh   rw  r   r   ry  rz  )
r   r   r  r  r!  r@  rA  r  r  r  )r   r  r  r  r  r  r  r  s           r   r  z RegexpChunkApp._highlight_devset  s   >[[!2!2T5F5F5JKF!!*eU;!!+ue<!!+ue< #,F"3GY(()9)9);<I,,y1K,,y1K$11+>!!'5*= ?${2!!'5+> 3${2!!'5+> 3 #4r   c                     	 | j                   j                  |      S # t        t        f$ r)}| j                  j                  ddd       |cY d }~S d }~ww xY w)Nr~  r   r   )r   parser   
IndexErrorr   r  )r   wordsr   s      r   r@  zRegexpChunkApp._chunkparse5  sN    	<<%%e,,J' 	 OO##GUE:L	s    AA
AAc           	          |\  }}| j                   j                  || j                  |    d| j                  ||f    | j                  |    d| j                  ||f   dz
          y )NrF   rh   )r  r  r  r  )r   r  r  r%  r  r   s         r   r  zRegexpChunkApp._color_chunk@  sq    
s||G$%Qt||GUN'C&DE||G$%Qt||GSL'AA'E&FG	
r   c                     d | _         d | _        d | _        d| _        g | _        d| _        | j                  j                  dd       | j                  d       | j                          y )Nr   r   r   )
r   r   r   r   r   r   r   r  r   r   )r   s    r   r   zRegexpChunkApp.resetH  sZ    "& ue,r   z# Regexp Chunk Parsing Grammar
# Saved %(date)s
#
# Development set: %(devset)s
#   Precision: %(precision)s
#   Recall:    %(recall)s
#   F-score:   %(fscore)s

%(grammar)s
c                    |sddg}t        |d      }|sy | j                  rL| j                  | j                  | j                  d   d         k(  rd | j                  d   dd  D        \  }}}n| j                  d	x}x}}nd
x}x}}t        |d      5 }|j                  | j                  t        t        j                         | j                  |||| j                  j                               z         d d d        y # 1 sw Y   y xY w)NzChunk Gramamr.chunkz	All files*r  	filetypesdefaultextensionr   r   c              3   ,   K   | ]  }d d|z  z    yw)z%.2f%%r   Nr  )r  vs     r   r  z.RegexpChunkApp.save_grammar.<locals>.<genexpr>j  s      ).CC!G$.Cs   rh   zGrammar not well formedzNot finished evaluation yetr  )dater   r-  r.  r5  r   )r   r   r   r   r   openwriteSAVE_GRAMMAR_TEMPLATEdictr9  ctimer   r   r   )r   filenameftypesr-  r.  r5  outfiles          r   r   zRegexpChunkApp.save_grammara  s	   13EFF(6HUH==T448N8NMM"a 9
 
).2mmB.?.C)%Ivv \\!*CCIC*GGIG(C GMM**++'!! LL..0
 !  s   A"C??Dc                    |sddg}t        |d      }|sy | j                  j                  dd       | j                          t	        |      5 }|j                         }d d d        t        j                  dd      j                         }| j                  j                  d|       | j                          y # 1 sw Y   [xY w)	Nr  r  r  r  r   r   z2^\# Regexp Chunk Parsing Grammar[\s\S]*F-score:.*
r7  )
r   r   r  r   r  readr   r   lstripr   )r   r  r   infiler   s        r   r   zRegexpChunkApp.load_grammar  s    13EFF&(SHue,(^vkkmG &&Er7

&( 	 	ug. ^s   B99Cc           	         |sddg}t        |d      }|sy t        |d      5 }|j                  d       |j                  dt        j                         z         |j                  d| j
                  z         t        | j                        D ]  \  }\  }}}}d	|d
z   t        | j                        |dz  |dz  |dz  fz  }	|j                  d|	z         |j                  dj                  d |j                         j                         D                      | j                  r.| j                  | j                  | j                  d   d         k(  s|| j                  |j                  d       n|j                  d       |j                  dj                  d | j                  j                         j                         D                     d d d        y # 1 sw Y   y xY w)N)zChunk Gramamr History.txtr  r'  r  r  z'# Regexp Chunk Parsing Grammar History
z# Saved %s
z# Development set: %s
z>Grammar %d/%d (precision=%.2f%%, recall=%.2f%%, fscore=%.2f%%)rh   r   z
%s
r7  c              3   &   K   | ]	  }d |z    ywz  %s
Nr  r  r  s     r   r  z.RegexpChunkApp.save_history.<locals>.<genexpr>  s     %TBS$hoBSr  r   r   z#
Current Grammar (not well-formed)
z!
Current Grammar (not evaluated)
c              3   &   K   | ]	  }d |z    ywr)  r  r*  s     r   r  z.RegexpChunkApp.save_history.<locals>.<genexpr>  s     U8THtO8Tr  )r   r  r  r9  r  r   r!  r   r  r  r   r  r   r   r   r   )
r   r  r   r!  r,  rF  rG  rH  rI  hdrs
             r   r   zRegexpChunkApp.save_history  s   79KLF(6FSH(C GMMDEMM.4::<78MM3d6F6FFG#,T]]#;<Aq!Q%1uc$--0!c'1s7AGLM 
 hn-bgg%T!'')//BS%TTU $< ++))$--*;A*>?@ <<'MM"IJMM"GHGGU8J8J8L8R8R8TUU- !  s   F.GG#c                     d}d}	 ddl m}  |||      j                          y #  t        | j                  ||       Y y xY w)Nz<NLTK RegExp Chunk Parser Application
Written by Edward Loperz2About: Regular Expression Chunk Parser Applicationr   )Message)messager   )tkinter.messageboxr.  showr   r   )r   r   ABOUTTITLEr.  s        r   r   zRegexpChunkApp.about  s>    TD	-2E/446	-TXXue,s	   % A c                 "   || j                   j                  |       | j                   j                  t        t        | j                        | j                   j                                      | j                  d       | j                  d       y )Nrh   r   )r   r   r  r  r   r   r   r   r}   s     r   r   zRegexpChunkApp.set_devset_size  sk    !!$'c#dkk"2D4E4E4I4I4KLMr   c                 *   || j                   j                  |       | j                   j                         }| j                  j	                  t        |              | j                  j	                  t        dt        |       dz  dz               y )N)r}   ir   r_   )r   r   r   r   r  absr   r  r5  s     r   r   zRegexpChunkApp.resize  so    JJNN4 zz~~

CI,/!!s3#d)r0AR0G'H!Ir   c                 R    t               ry | j                  j                  |i | y)z
        Enter the Tkinter mainloop.  This function must be called if
        this demo is created from a non-interactive program (e.g.
        from a secript); otherwise, the demo will close as soon as
        the script completes.
        N)r   r   mainloop)r   r  kwargss      r   r9  zRegexpChunkApp.mainloop  s%     94*6*r   )r   Nr7  NPNr   )B__name__
__module____qualname____doc__r   r  r  r  r:  r?  r<  rO  rN  r  r  r  r  _FONT_PARAMSr  r  r  _HELPTAB_BG_COLOR_HELPTAB_FG_COLORr  r  r  r   r   r   r   r   r   r  _DRAW_LINESr   r8  r=  rD  r   r   r   rc  r  r  r  r   r   r   r  r   r  r  r  r   r  r@  r  r   r  r   r   r   r   r   r   r9  r  r   r   r   r   -   s   .(.$. 	. 	h	.
 	l. 	$. 	!. 	$. 	n. 	j. 	k. 	d. 	'. 	n. 	'.  	!." 	 #.$ 	!%.& 	g'.( 	~).* 	&+., 	&-.. 	+/.0 	&1.2 	$3.4 	5.6 	7.8 	l9.: 	";.< 	&=.> 	!?.@ 	{A.B 	C.D 	 E.F +,/400[.FneDP 
'(	$&)*	df-.	dT*+	td#$	4b12	tQ45	'(	46*+	$&)*L" K+ K#JCO O "	 "
O "
 VHQGN{5LF1EM"O FN 00O	  iV:
`$D HKE"N  IJV0{Jz N==~ +D'3R+,Z
;6!"1f?.	
	 < B-J	+r   r   c                  4    t               j                          y r   )r   r9  r  r   r   apprE    s    r   __main__rE  )'r?  randomr   textwrapr9  tkinterr   r   r   r   r   r   r	   r
   r   r   tkinter.filedialogr   r   tkinter.fontr   
nltk.chunkr   r   nltk.chunk.regexpr   nltk.corpusr   r   nltk.draw.utilr   	nltk.treer   	nltk.utilr   r   rE  r<  __all__r  r   r   <module>rS     sj     	     B  4 - 1 #  e+ e+P-  zE'r   