
    gP/                         d Z ddlZddlZddlmZ ddZd Zd Zd Z	 G d d	      Z
d
 Zd ZefdZefdZi addZd Zy)z0
Utility functions and classes for classifiers.
    N)LazyMapc                      ||xr t        |d   t        t        f      }|r fd}t        ||      S t         |      S )a  
    Use the ``LazyMap`` class to construct a lazy list-like
    object that is analogous to ``map(feature_func, toks)``.  In
    particular, if ``labeled=False``, then the returned list-like
    object's values are equal to::

        [feature_func(tok) for tok in toks]

    If ``labeled=True``, then the returned list-like object's values
    are equal to::

        [(feature_func(tok), label) for (tok, label) in toks]

    The primary purpose of this function is to avoid the memory
    overhead involved in storing all the featuresets for every token
    in a corpus.  Instead, these featuresets are constructed lazily,
    as-needed.  The reduction in memory overhead can be especially
    significant when the underlying list of tokens is itself lazy (as
    is the case with many corpus readers).

    :param feature_func: The function that will be applied to each
        token.  It should return a featureset -- i.e., a dict
        mapping feature names to feature values.
    :param toks: The list of tokens to which ``feature_func`` should be
        applied.  If ``labeled=True``, then the list elements will be
        passed directly to ``feature_func()``.  If ``labeled=False``,
        then the list elements should be tuples ``(tok,label)``, and
        ``tok`` will be passed to ``feature_func()``.
    :param labeled: If true, then ``toks`` contains labeled tokens --
        i.e., tuples of the form ``(tok, label)``.  (Default:
        auto-detect based on types.)
    r   c                 $     | d         | d   fS )Nr       )labeled_tokenfeature_funcs    G/var/www/openai/venv/lib/python3.12/site-packages/nltk/classify/util.py	lazy_funcz!apply_features.<locals>.lazy_funcA   s     q!12M!4DEE    )
isinstancetuplelistr   )r	   tokslabeledr   s   `   r
   apply_featuresr      sI    B =:d1gt}=	F y$''|T**r   c                 J    t        | D ch c]  \  }}|	 c}}      S c c}}w )a!  
    :return: A list of all labels that are attested in the given list
        of tokens.
    :rtype: list of (immutable)
    :param tokens: The list of classified tokens from which to extract
        labels.  A classified token has the form ``(token, label)``.
    :type tokens: list
    )r   )tokenstoklabels      r
   attested_labelsr   I   s%     F3FLS%%F3443s   
c                    | j                  |D cg c]  \  }}|	 c}}      }t        ||      D cg c]  \  \  }}}|j                  |       }}}}t        j                  t        |      t        |      z        S c c}}w c c}}}w N)prob_classify_manyzipprobmathlogsumlen)
classifiergoldfslresultspdistlls          r
   log_likelihoodr(   U   sx    ++t,DtGRRt,DEG03D'0B	C0B,Wb!e%**Q-0BB	C88CGc"g%&& -E	Cs
   B
Bc                     | j                  |D cg c]  \  }}|	 c}}      }t        ||      D cg c]  \  \  }}}||k(   }}}}|rt        |      t        |      z  S yc c}}w c c}}}w )Nr   )classify_manyr   r   r    )r!   r"   r#   r$   r%   rcorrects          r
   accuracyr-   [   sp    &&$'?$wA$'?@G*-dG*<=*<,7BAqAv*<G=7|c'l** (@=s
   A%
A+c                       e Zd ZdZd Zd Zy)CutoffCheckerz
    A helper class that implements cutoff checks based on number of
    iterations and log likelihood.

    Accuracy cutoffs are also implemented, but they're almost never
    a good idea to use.
    c                     |j                         | _        d|v rt        |d          |d<   d|v rt        |d         |d<   d | _        d | _        d| _        y )Nmin_llmin_lldeltar   )copycutoffsabsr'   acciter)selfr4   s     r
   __init__zCutoffChecker.__init__m   s_    ||~w!$WX%6!7 7GHG#%()?%@GM"	r   c                 v   | j                   }| xj                  dz  c_        d|v r| j                  |d   k\  ryt        j                  j                  j                  ||      }t        j                  |      ryd|v sd|v rCd|v r	||d   k\  ryd|v r+| j                  r|| j                  z
  t        |d         k  ry|| _        d|v sd|v rnt        j                  j                  j                  ||      }d|v r	||d   k\  ryd|v r+| j                  r|| j                  z
  t        |d         k  ry|| _
        yy )	Nr   max_iterTr1   r2   max_accmin_accdeltaF)r4   r7   nltkclassifyutilr(   r   isnanr'   r5   r6   )r8   r!   
train_toksr4   new_llnew_accs         r
   checkzCutoffChecker.checkw   s6   ,,		Q	 TYY'*2E%E##22:zJ::fw-7":7"v1B'B(GGtww&3w}/E+FFDG>W#<mm((77
JOGG#793E(E')HH(S1H-IIDH $=r   N)__name__
__module____qualname____doc__r9   rE   r   r   r
   r/   r/   d   s    !r   r/   c                     i }d|d<   | d   j                         |d<   | d   j                         |d<   dD ]?  }| j                         j                  |      |d|z  <   || j                         v |d	|z  <   A |S )
NTalwaysonr   
startswithendswithabcdefghijklmnopqrstuvwxyz	count(%s)has(%s)lowercountnamefeaturesletters      r
   names_demo_featuresrY      s    HHZ!!W]]_H\8>>+HZ.)-););F)Cv%&'-'=V#$ / Or   c                 l   i }d|d<   | d   j                         dv |d<   | d   j                         dv |d<   dD ]w  }| j                         j                  |      |d	|z  <   || j                         v |d
|z  <   || d   j                         k(  |d|z  <   || d   j                         k(  |d|z  <   y |S )NTrK   r   aeiouyzstartswith(vowel)rM   zendswith(vowel)rO   rP   rQ   zstartswith(%s)zendswith(%s)rR   rU   s      r
   binary_names_demo_featuresr\      s    HHZ$(GMMOx$?H !"&r(.."2h">H.)-););F)Cv%&'-'=V#$.4Q.G!F*+,2d2hnn6F,F&()	 /
 Or   c           
          dd l }ddlm} |j                  d      D cg c]  }|df c}|j                  d      D cg c]  }|df c}z   }|j	                  d       |j                  |       |d d }|dd	 }t        d
        | |D 	cg c]  \  }}	 ||      |	f c}	}      }
t        d       t        |
|D 	cg c]  \  }}	 ||      |	f c}	}      }t        d|z         	 |D 	cg c]  \  }}	 ||       }}}	|
j                  |      }t        ||      D cg c]  \  \  }}}|j                  |       }}}}t        dt        |      t        |      z  z         t                t        d       t        t        ||            d d D ]A  \  \  }}}|dk(  rd}nd}t        |||j                  d      |j                  d      fz         C 	 |
S c c}w c c}w c c}	}w c c}	}w c c}	}w c c}}}w # t        $ r Y |
S w xY w)Nr   namesmale.txtmale
female.txtfemale@ i  i|  Training classifier...Testing classifier...Accuracy: %6.4fAvg. log likelihood: %6.4fMUnseen Names      P(Male)  P(Female)
----------------------------------------     %-15s *%6.4f   %6.4f  %-15s  %6.4f  *%6.4f)randomnltk.corpusr_   wordsseedshuffleprintr-   r   r   logprobr   r    r   r   NotImplementedError)trainerrW   rm   r_   rV   namelisttraintestngr!   r6   test_featuresetspdistsr"   r&   r'   genderfmts                      r
   
names_demor      s,   ! ,1;;z+BC+B4v+BC%*[[%>G%>Tx%>G H
 KK
NN8UdOEDD 

"#?v18A;*?@J 

!"
:tDtVa!a 0tD
EC	
c
!"6:;dFQHQKd;../?@?B4?PQ?P&;|dUemmD!?PQ*c"gD	.ABCAB%)#dF*;%<Ra%@!NT6E..#uzz&15::h3GHHI &A M D G @  E <Q   	sG   GG"G'
>G-)H  .G3 $H  $G9BH  3H   	HHc           
      Z   dd l }ddlm} |j                  d      }|j                  d      }|j	                  d       |j                  |       |j                  |       t        ||d d       }t        ||dd |d d z         }|dd	 D cg c]  }|d
f c}|dd D cg c]  }|df c}z   }	|j                  |	       t        d        | ||      }
t        d       t        |
|	D cg c]  \  }} ||      |f c}}      }t        d|z         	 |	D cg c]  \  }} ||       }}}|
j                  |      }t        |	|      D cg c]  \  \  }}}|j                  |       }}}}t        dt        |      t        |	      z  z         t                t        d       t        |	|      d d D ]A  \  \  }}}|d
k(  rd}nd}t        |||j                  d
      |j                  d      fz         C 	 |
S c c}w c c}w c c}}w c c}}w c c}}}w # t        $ r Y |
S w xY w)Nr   r^   r`   rb   i	 i  i	  i  i
  Ti  Fre   rf   rg   rh   ri   rj   rk   rl   )rm   rn   r_   ro   rp   rq   maprr   r-   r   r   rs   r   r    r   rt   )ru   rW   rm   r_   
male_namesfemale_namespositive	unlabeledrV   rx   r!   ry   mr6   r{   r|   r"   r&   r'   is_maler~   s                        r
   partial_names_demor      sO   !Z(J;;|,L
KK
NN:
NN<  8Z./H Hjd3l4C6HHII &0T%:;%:TT4L%:;".s3"7?"7$u"7? D NN4 

"#9-J 

!"
:tDtVa!a 0tD
EC	
c
!"6:;dFQHQKd;../?@?B4?PQ?P&;|dUemmD!?PQ*c"gD	.ABCAB&)$&7&;"OT7U$..#uzz$/E1BCCD '< E < ?  E <Q   	sC   H H*H
H H,$H H.BH H 	H*)H*c           
      N   dd l }ddlm} t        d       |t        vr5|j                  |      D cg c]  }||j                  d   f c}t        |<   t        |   d d  }|t        |      kD  rt        |      }t        |D ch c]  \  }}|	 c}}      }	t        ddj                  |	      z          t        d       |j                  d       |j                  |       |d t        d|z         }
|t        d|z        | }t        d	        | |
D cg c]  \  }} ||      |f c}}      }t        d
       t        ||D cg c]  \  }} ||      |f c}}      }t        d|z         	 |D cg c]  \  }} ||       }}}|j                  |      }t        ||      D cg c]  \  \  }}}|j!                  |       }}}}t        dt#        |      t        |      z  z         |S c c}w c c}}w c c}}w c c}}w c c}}w c c}}}w # t$        $ r Y |S w xY w)Nr   )sensevalzReading data...z
  Senses:  zSplitting into test & train...rd   g?re   rf   rg   rh   )rm   rn   r   rr   _inst_cache	instancessensesr    r   joinrp   rq   intr-   r   r   rs   r   rt   )ru   wordrW   ry   rm   r   ir   r$   r   rw   rx   r!   r6   r{   r|   rV   r"   r&   r'   s                       r
   wsd_demor   "  s!   $ 

;7?7I7I$7OP7O!a!-7OPDD!!$I3y>	N9-9!Q19-.F	,&)
)* 

*+
KK
NN9nC!G%ESq\A&D 

"#?v18A;*?@J 

!"
:tDtVa!a 0tD
EC	
c
!"6:;dFQHQKd;../?@?B4?PQ?P&;|dUemmD!?PQ*c"gD	.ABC
 E Q . @  E <Q  	sG   G3G8
G>
H1H 6H
$H ,H
'H 
H 	H$#H$c                  b    	 t          y# t        $ r} t        d      }t        |      | d} ~ ww xY w)z8
    Checks whether the MEGAM binary is configured.
    z\Please configure your megam binary first, e.g.
>>> nltk.config_megam('/usr/bin/local/megam')N)
_megam_bin	NameErrorstr)eerr_msgs     r
   check_megam_configr   P  s8    ( (<
  a'(s   	 	.).r   )i  )rI   r   nltk.classify.utilr>   	nltk.utilr   r   r   r(   r-   r/   rY   r\   r   r   r   r   r   r   r   r
   <module>r      si      *+Z	5'4 4x
 "5 ,^ *= 5p +\(r   