
    gw                     n    	 d dl Z d dlmZmZmZ  G d de      Zd Zedk(  r e        yy# e$ r Y -w xY w)    N)
DendrogramVectorSpaceClusterercosine_distancec                   L    e Zd ZdZddZddZddZd Zd Zd Z	d	 Z
d
 Zd Zy)GAAClustereraM  
    The Group Average Agglomerative starts with each of the N vectors as singleton
    clusters. It then iteratively merges pairs of clusters which have the
    closest centroids.  This continues until there is only one cluster. The
    order of merges gives rise to a dendrogram: a tree with the earlier merges
    lower than later merges. The membership of a given number of clusters c, 1
    <= c <= N, can be found by cutting the dendrogram at depth c.

    This clusterer uses the cosine similarity metric only, which allows for
    efficient speed-up in the clustering process.
    Nc                 \    t        j                  | ||       || _        d | _        d | _        y N)r   __init___num_clusters_dendrogram_groups_values)selfnum_clusters	normalisesvd_dimensionss       F/var/www/openai/venv/lib/python3.12/site-packages/nltk/cluster/gaac.pyr
   zGAAClusterer.__init__   s,    %%dI~F)"    c           	          t        |D cg c]&  }t        j                  |t        j                        ( c}      | _        t        j                  | |||      S c c}w r	   )r   numpyarrayfloat64r   r   cluster)r   vectorsassign_clusterstracevectors        r   r   zGAAClusterer.cluster#   sO    %>EFgFU[[/gF
 $++D'?ERR Gs   +Ac                 R   t        |      }dg|z  }|}t        j                  |      }||f}t        j                  |t              t        j
                  z  }t        |      D ]-  }	t        |	dz   |      D ]  }
t        ||	   ||
         ||	|
f<    / |t        | j                  d      kD  rt        j                  |j                         |      \  }	}
|rt        d|	|
fz         | j                  |||	|
       t        j
                  |d d |
f<   t        j
                  ||
d d f<   ||	   ||
   z   ||	<   | j                  j                  ||	   ||
          |dz  }||
dz   d xxx dz  ccc |||
<   |t        | j                  d      kD  r| j!                  | j                         y )N   )dtypezmerging %d and %d)lenr   arangeonesfloatinfranger   maxr   unravel_indexargminprint_merge_similaritiesr   mergeupdate_clusters)r   r   r   Ncluster_lencluster_count	index_mapdimsdistijs              r   cluster_vectorspacez GAAClusterer.cluster_vectorspace*   s   LcAgLLO	 1vzz$e,uyy8qA1q5!_,WQZDQT
 %  c$"4"4a88&&t{{}d;DAq)QF23 $$T;1= DAJDAJ )^k!n<KN""9Q<1>QM a!eg!#IaL) c$"4"4a88, 	T//0r   c                 P   ||   }||   }||z   }|d ||f   |z  |d ||f   |z  z   |d ||f<   |d ||fxx   |z  cc<   |||dz   |f   |z  ||dz   ||f   |z  z   |||dz   |f<   |||dz   d f   |z  |||dz   d f   |z  z   |||dz   d f<   |||dz   d fxx   |z  cc<   y )Nr    )r   r2   r.   r3   r4   i_weightj_weight
weight_sums           r   r*   z GAAClusterer._merge_similaritiesP   s    q>q>(
 2A2q5kH,tBQBE{X/EERaRURaRUz! AEAI)DQA,>,II 	QA	\  1q57
+h6aQj9IH9TTQAZQAZJ&r   c                    | j                   j                  |      }g | _        |D ]  }t        |      dkD  sJ | j                  r| j                  |d         }nt        j                  |d         }|dd  D ](  }| j                  r|| j                  |      z  }$||z  }* |t        |      z  }| j                  j                  |        t        | j                        | _	        y Nr   r   )
r   groups
_centroidsr    _should_normalise
_normaliser   r   appendr   )r   r   clustersr   centroidr   s         r   r,   zGAAClusterer.update_clustersc   s    ##**<8Gw<!###%%??71:6 ;;wqz2!!"+)) 77H&H	 &
 G$HOO""8,   !1r   c                     d }t        | j                        D ],  }| j                  |   }t        ||      }|r	||d   k  s)||f}. |d   S r<   )r%   r   r>   r   )r   r   bestr3   rC   r2   s         r   classify_vectorspacez!GAAClusterer.classify_vectorspaceu   sW    t))*Aq)H"684D4$q'>ay	 +
 Awr   c                     | j                   S )zi
        :return: The dendrogram representing the current clustering
        :rtype:  Dendrogram
        )r   r   s    r   
dendrogramzGAAClusterer.dendrogram~   s    
 r   c                     | j                   S r	   r   rH   s    r   r   zGAAClusterer.num_clusters   s    !!!r   c                      d| j                   z  S )Nz*<GroupAverageAgglomerative Clusterer n=%d>rK   rH   s    r   __repr__zGAAClusterer.__repr__   s    ;d>P>PPPr   )r   TN)FF)F)__name__
__module____qualname____doc__r
   r   r5   r*   r,   rF   rI   r   rM   r7   r   r   r   r      s7    
#S$1L'&2$ "Qr   r   c                     ddl m}  ddgddgddgddgddgddgfD cg c]  }t        j                  |       }} | d      }|j	                  |d      }t        d|       t        d	|       t        d
|       t                |j                         j                          t        j                  ddg      }t        d|z  d       t        |j                  |             t                yc c}w )zO
    Non-interactive demonstration of the clusterers with simple 2-D data.
    r   )r      r         Tz
Clusterer:z
Clustered:zAs:zclassify(%s): )endN)	nltk.clusterr   r   r   r   r)   rI   showclassify)r   fr   	clustererrB   r   s         r   demor]      s    
 * *+AAAAAQRTUPV'WX'W!u{{1~'WGX QI  $/H	,	"	, 	%	G ! [[!Q F	/F
",	)

V
$%	G% Ys   C-__main__)	r   ImportErrornltk.cluster.utilr   r   r   r   r]   rN   r7   r   r   <module>ra      sV   	 P OyQ' yQx: zF   		s   , 44