
    g(                     n    d Z ddlmZ ddlmZ ddlmZmZmZm	Z	  G d de      Z
d Zedk(  r e        y	y	)
a  
A classifier based on the Naive Bayes algorithm.  In order to find the
probability for a label, this algorithm first uses the Bayes rule to
express P(label|features) in terms of P(label) and P(features|label):

|                       P(label) * P(features|label)
|  P(label|features) = ------------------------------
|                              P(features)

The algorithm then makes the 'naive' assumption that all features are
independent, given the label:

|                       P(label) * P(f1|label) * ... * P(fn|label)
|  P(label|features) = --------------------------------------------
|                                         P(features)

Rather than computing P(features) explicitly, the algorithm just
calculates the numerator for each label, and normalizes them so they
sum to one:

|                       P(label) * P(f1|label) * ... * P(fn|label)
|  P(label|features) = --------------------------------------------
|                        SUM[l]( P(l) * P(f1|l) * ... * P(fn|l) )
    )defaultdict)ClassifierI)DictionaryProbDistELEProbDistFreqDistsum_logsc                   L    e Zd ZdZd Zd Zd Zd Zd
dZddZ	e
efd       Zy	)NaiveBayesClassifiera  
    A Naive Bayes classifier.  Naive Bayes classifiers are
    paramaterized by two probability distributions:

      - P(label) gives the probability that an input will receive each
        label, given no information about the input's features.

      - P(fname=fval|label) gives the probability that a given feature
        (fname) will receive a given value (fval), given that the
        label (label).

    If the classifier encounters an input with a feature that has
    never been seen with any label, then rather than assigning a
    probability of 0 to all labels, it will ignore that feature.

    The feature value 'None' is reserved for unseen feature values;
    you generally should not use 'None' as a feature value for one of
    your own features.
    c                 \    || _         || _        t        |j                               | _        y)a=  
        :param label_probdist: P(label), the probability distribution
            over labels.  It is expressed as a ``ProbDistI`` whose
            samples are labels.  I.e., P(label) =
            ``label_probdist.prob(label)``.

        :param feature_probdist: P(fname=fval|label), the probability
            distribution for feature values, given labels.  It is
            expressed as a dictionary whose keys are ``(label, fname)``
            pairs and whose values are ``ProbDistI`` objects over feature
            values.  I.e., P(fname=fval|label) =
            ``feature_probdist[label,fname].prob(fval)``.  If a given
            ``(label,fname)`` is not a key in ``feature_probdist``, then
            it is assumed that the corresponding P(fname=fval|label)
            is 0 for all values of ``fval``.
        N)_label_probdist_feature_probdistlistsamples_labels)selflabel_probdistfeature_probdists      M/var/www/openai/venv/lib/python3.12/site-packages/nltk/classify/naivebayes.py__init__zNaiveBayesClassifier.__init__@   s)    "  .!1N2245    c                     | j                   S N)r   )r   s    r   labelszNaiveBayesClassifier.labelsU   s    ||r   c                 @    | j                  |      j                         S r   )prob_classifymax)r   
featuresets     r   classifyzNaiveBayesClassifier.classifyX   s    !!*-1133r   c                 "   |j                         }t        |j                               D ](  }| j                  D ]  }||f| j                  v s % ||= * i }| j                  D ]   }| j
                  j                  |      ||<   " | j                  D ]n  }|j                         D ]Y  \  }}||f| j                  v r.| j                  ||f   }||xx   |j                  |      z  cc<   D||xx   t        g       z  cc<   [ p t        |dd      S )NT)	normalizelog)
copyr   keysr   r   r   logprobitemsr   r   )r   r   fnamelabelr$   fvalfeature_probss          r   r   z"NaiveBayesClassifier.prob_classify[   s     __&
*//+,E5>T%;%;; &
 u% - \\E!1199%@GEN " \\E)//1t5>T%;%;;$($:$:5%<$HMENm&;&;D&AAN
 ENhrl2N  2 " "'TtDDr   c                   	 | j                   t        d       | j                  |      D ]  \  fd	t        fd| j                  D        	fdd      }t        |      dk(  rB|d   }|d	   }|f   j                        dk(  rd
}n0d|f   j                        |f   j                        z  z  }t        ddddd|z  d d ddd|z  d d dd|d
        y )NzMost Informative Featuresc                 0    | f   j                        S r   )prob)lcpdistr&   r(   s    r   	labelprobzFNaiveBayesClassifier.show_most_informative_features.<locals>.labelprob   s    ah',,T22r   c              3   R   K   | ]  }|f   j                         v s|   y wr   )r   ).0r-   r.   r&   r(   s     r   	<genexpr>zFNaiveBayesClassifier.show_most_informative_features.<locals>.<genexpr>   s+     OLqDF1e84D4L4L4N,NLs   ''c                      |        | fS r    )elementr/   s    r   <lambda>zENaiveBayesClassifier.show_most_informative_features.<locals>.<lambda>   s    i&8%8'$Br   T)keyreverse   r   INFz%8.1fz>24z = 14 z%s   z>6z : 6z : 1.0)r   printmost_informative_featuressortedr   lenr,   )
r   nr   l0l1ratior.   r&   r(   r/   s
         @@@@r   show_most_informative_featuresz3NaiveBayesClassifier.show_most_informative_features|   s    '')*99!<KE43 ODLLOBF
 6{aBBb%i %%d+q02u9%**406"e)3D3I3I$3OO $BQ$)Ra%I) =r   c                   	
 t        | d      r| j                  d| S t               }t        t              	t        d       
| j
                  j                         D ]  \  \  }}}|j                         D ]f  }||f}|j                  |       |j                  |      }t        |	|         	|<   t        |
|         
|<   
|   dk(  sV|j                  |       h  t        |	
fd      | _        | j                  d| S )a  
        Return a list of the 'most informative' features used by this
        classifier.  For the purpose of this function, the
        informativeness of a feature ``(fname,fval)`` is equal to the
        highest value of P(fname=fval|label), for any label, divided by
        the lowest value of P(fname=fval|label), for any label:

        |  max[ P(fname=fval|label1) / P(fname=fval|label2) ]
        _most_informative_featuresNc                       y)Ng      ?r4   r4   r   r   r6   z@NaiveBayesClassifier.most_informative_features.<locals>.<lambda>   s    #r   r   c                 f    |    |    z  | d   | d   dv t        | d         j                         fS )Nr   r9   )NFT)strlower)feature_maxprobminprobs    r   r6   z@NaiveBayesClassifier.most_informative_features.<locals>.<lambda>   sC    H%(99QKQK#66$**,	&r   )r7   )hasattrrJ   setr   floatr   r%   r   addr,   r   mindiscardrB   )r   rD   featuresr'   r&   probdistr(   featureprP   rQ   s            @@r   rA   z.NaiveBayesClassifier.most_informative_features   s    456222A66 uH "%(G!+.G,0,B,B,H,H,J($,,.D$dmGLL) d+A'*1gg.>'?GG$'*1gg.>'?GG$w'1, ((1 / -K /5/D+ ..r22r   c                 |   t               }t        t               }t        t              }t               }|D ]a  \  }}||xx   dz  cc<   |j                         D ]<  \  }	}
|||	f   |
xx   dz  cc<   ||	   j	                  |
       |j	                  |	       > c |D ]U  }||   }|D ]I  }	|||	f   j                         }||z
  dkD  s!|||	f   dxx   ||z
  z  cc<   ||	   j	                  d       K W  ||      }i }|j                         D ]%  \  \  }}	} ||t        ||	               }||||	f<   '  | ||      S )z
        :param labeled_featuresets: A list of classified featuresets,
            i.e., a list of tuples ``(featureset, label)``.
        r9   r   N)bins)r   r   rS   r%   rU   NrC   )clslabeled_featuresets	estimatorlabel_freqdistfeature_freqdistfeature_valuesfnamesr   r'   r&   r(   num_samplescountr   r   freqdistrY   s                    r   trainzNaiveBayesClassifier.train   st    "&x0$S) "5J5!Q&!)//1t .t494u%))$/

5!  2 "5 $E(/K(688: &*$UE\248K%<OO8"5)--d3   $ #>2 (8(>(>(@$NUEH N54I0JKH-5UE\* )A >#344r   N)
   )d   )__name__
__module____qualname____doc__r   r   r   r   rH   rA   classmethodr   ri   r4   r   r   r
   r
   +   s?    (6*4EB<)3V 2= .5 .5r   r
   c                  \    ddl m}   | t        j                        }|j	                          y )Nr   )
names_demo)nltk.classify.utilrr   r
   ri   rH   )rr   
classifiers     r   demoru      s"    -0667J--/r   __main__N)ro   collectionsr   nltk.classify.apir   nltk.probabilityr   r   r   r   r
   ru   rl   r4   r   r   <module>rz      s@   2 $ ) P PI5; I5b0 zF r   