
    g                      f    	 d dl Z d dlmZ  G d de      Zd Zedk(  r e        yy# e$ r Y )w xY w)    N)VectorSpaceClustererc                   P    e Zd ZdZ	 	 	 	 	 	 ddZd ZddZd Zd Zd Z	d	 Z
d
 Zy)EMClusterera  
    The Gaussian EM clusterer models the vectors as being produced by
    a mixture of k Gaussian sources. The parameters of these sources
    (prior probability, mean and covariance matrix) are then found to
    maximise the likelihood of the given data. This is done with the
    expectation maximisation algorithm. It starts with k arbitrarily
    chosen means, priors and covariance matrices. It then calculates
    the membership probabilities for each vector in each of the
    clusters; this is the 'E' step. The cluster parameters are then
    updated in the 'M' step using the maximum likelihood estimate from
    the cluster membership probabilities. This process continues until
    the likelihood of the data does not significantly increase.
    Nc                     t        j                  | ||       t        j                  |t        j                        | _        t        |      | _        || _        || _	        || _
        || _        y)aL  
        Creates an EM clusterer with the given starting parameters,
        convergence threshold and vector mangling parameters.

        :param  initial_means: the means of the gaussian cluster centers
        :type   initial_means: [seq of] numpy array or seq of SparseArray
        :param  priors: the prior probability for each cluster
        :type   priors: numpy array or seq of float
        :param  covariance_matrices: the covariance matrix for each cluster
        :type   covariance_matrices: [seq of] numpy array
        :param  conv_threshold: maximum change in likelihood before deemed
                    convergent
        :type   conv_threshold: int or float
        :param  bias: variance bias used to ensure non-singular covariance
                      matrices
        :type   bias: float
        :param  normalise:  should vectors be normalised to length 1
        :type   normalise:  boolean
        :param  svd_dimensions: number of dimensions to use in reducing vector
                               dimensionsionality with SVD
        :type   svd_dimensions: int
        N)r   __init__numpyarrayfloat64_meanslen_num_clusters_conv_threshold_covariance_matrices_priors_bias)selfinitial_meanspriorscovariance_matricesconv_thresholdbias	normalisesvd_dimensionss           D/var/www/openai/venv/lib/python3.12/site-packages/nltk/cluster/em.pyr   zEMClusterer.__init__   sX    @ 	%%dI~Fkk-? /-$7!
    c                     | j                   S N)r   r   s    r   num_clusterszEMClusterer.num_clustersG   s    !!!r   c           
      B   t        |      dkD  sJ t        |d         }| j                  }| j                  }|sBt        j                  | j
                  t        j                        | j
                  z  x}| _        | j                  }|sLt        | j
                        D cg c]&  }t        j                  |t        j                        ( c}x}| _        | j                  ||||      }d}	|	s-|rt        d|       t        j                  t        |      | j
                  ft        j                        }
t        t        |            D ]d  }t        | j
                        D ])  }||   | j                  ||   ||   ||         z  |
||f<   + |
|d d fxx   t        |
|d d f         z  cc<   f t        | j
                        D ]  }||   }t        j                  ||ft        j                        }t        j                  |t        j                        }d}t        t        |            D ]R  }||   ||   z
  }||
||f   t        j                  j!                  ||      z  z  }||
||f   z  }||
||f   ||   z  z  }T ||z  ||<   ||z  ||<   |t        |      z  ||<   ||xx   | j"                  t        j                  |t        j                        z  z  cc<    | j                  ||||      }t%        ||z
        | j&                  k  rd}	|}|	s,y y c c}w )Nr   Fziteration; loglikelihood        T)r   r   r   r   onesr   r
   r   rangeidentity_loglikelihoodprintzeros	_gaussiansummultiplyouterr   absr   )r   vectorstrace
dimensionsmeansr   covariancesilastl	convergedhjcovariance_beforenew_covariancenew_meansum_hjdeltals                     r   cluster_vectorspacezEMClusterer.cluster_vectorspaceJ   s   7|a _


4--u}}=@R@RRFT\ // t11272A z5==927 K$3 ##GVUKH	0%8S\4+=+=>NA3w<(t112A$Qi$..a+a.'!*+ AadG 3 !Q$3qAw<' ) 4--.$/N!!&j*-Eu}}!U ;;z5==As7|,A#AJq1E"a1g0D0DUE0R&RRNa1g%F!Q$'!* 44H	 -
 "0&!8A#f,a"S\1q	 A$**u~~j%--/X"XX /$ ##GVUKHA 519~ 4 44 	EI 7s    +Lc                     d }t        | j                        D ]N  }| j                  |   | j                  | j                  |   | j
                  |   |      z  }|r	||d   kD  sK||f}P |d   S )Nr      )r#   r   r   r(   r   r   )r   vectorbestr6   ps        r   classify_vectorspacez EMClusterer.classify_vectorspace   sv    t))*AQ$..A 9 9! <f# A 1tAw;1v + Awr   c                     | j                         j                  |      }| j                  |   | j                  | j                  |   | j
                  |   |      z  S r   )cluster_namesindexr   r(   r   r   )r   r@   clustercids       r   likelihood_vectorspacez"EMClusterer.likelihood_vectorspace   sW      "((1||G$t~~KK $";";G"Df(
 
 	
r   c                    t        |      }|j                  ||fk(  sJ dt        |j                        z         	 t        j                  j                  |      }t        j                  j                  |      }|dz  dt        j                  z  | dz  z  z  }||z
  }t        ||       dt        j                  t        j                  ||      |      z  }	|t        j                  |	      z  S # t        $ r Y yw xY w)Nzbad sized covariance matrix, %sg         g       @r   )r   shapestrr   linalgdetinvpir&   dotexpOverflowError)
r   meancvmxmrO   rP   adxbs
             r   r(   zEMClusterer._gaussian   s    IyyQF"V$ECII$VV"	,,""3'C,,""3'CT	Q\rCx88ATB"cNuyy2s!3R88Auyy|## 	 	s   B3C/ /	C;:C;c           	          d}|D ]W  }d}t        t        |            D ]$  }|||   | j                  ||   ||   |      z  z  }& |t        j                  |      z  }Y |S )Nr!   r   )r#   r   r(   r   log)	r   r-   r   r0   r1   llhr@   rB   r6   s	            r   r%   zEMClusterer._loglikelihood   sj    FA3v;'VAYa+a.&!QQQ (599Q<C	 
 
r   c                 2    dt        | j                        z  S )Nz<EMClusterer means=%s>)listr   r   s    r   __repr__zEMClusterer.__repr__   s    '$t{{*;;;r   )NNgư>皙?FN)F)__name__
__module____qualname____doc__r   r   r=   rC   rI   r(   r%   ra    r   r   r   r      sE    "  &P":x
 <r   r   c                  T   ddl m}  ddgddgddgfD cg c]  }t        j                  |       }}ddgdd	gg}| j	                  |d
      }|j                  |dd      }t        d|       t        d|       t                t        d      D ]c  }t        d|       t        d|j                  |          t        d|j                  |          t        d|j                  |          t                e t        j                  ddg      }t        d|z  d       t        |j                  |             t        j                  ddg      }t        d|z         |j                  |      }|j                         D ]&  }	t        |	 d|j                  |	      dz  dd       ( yc c}w )zO
    Non-interactive demonstration of the clusterers with simple 2-D data.
    r   )rG   g      ?g      ?r?         rK   gGz @rb   )r   T)r.   z
Clustered:z
As:       zCluster:zPrior:  zMean:   zCovar:  zclassify(%s): )endzclassification_probdist(%s):z => d   z.0f%N)nltkrG   r   r	   r   r&   r#   r   r   r   classifyclassification_probdistsamplesprob)
rG   fr-   r0   	clustererclusterscr@   pdistsamples
             r   demorz      s   
  *-c
S#JA'GH'G!u{{1~'GGHVaYE##E#4I  $d ;H	, 	,!	G1Xj!j)++A./j)**1-.j)88;<  [[!Q F	/F
",	)

V
$% [[!Q F	
(6
12--f5E--/UZZ/#5c:!<= "5 Is   F%__main__)r   ImportErrornltk.cluster.utilr   r   rz   rc   rg   r   r   <module>r~      sQ   	 3`<& `<F$>N zF a  		s   ( 00