
    g&                         d dl Z d dlmZ d dlmZ d dlmZ 	 d dlZd dl	m
Z
  G d de
      Zd Zd	 Z G d
 d      Z G d d      Zy# e$ r Y 3w xY w)    N)abstractmethod)sqrt)stdout)ClusterIc                   ^    e Zd ZdZddZddZed        Zd Zed        Z	d Z
d	 Zd
 Zd Zy)VectorSpaceClustererz
    Abstract clusterer which takes tokens and maps them into a vector space.
    Optionally performs singular value decomposition to reduce the
    dimensionality.
    Nc                 .    d| _         || _        || _        y)a)  
        :param normalise:       should vectors be normalised to length 1
        :type normalise:        boolean
        :param svd_dimensions:  number of dimensions to use in reducing vector
                                dimensionsionality with SVD
        :type svd_dimensions:   int
        N)_Tt_should_normalise_svd_dimensions)self	normalisesvd_dimensionss      F/var/www/openai/venv/lib/python3.12/site-packages/nltk/cluster/util.py__init__zVectorSpaceClusterer.__init__   s     !*-    c                 "   t        |      dkD  sJ | j                  rt        t        | j                  |            }| j
                  r| j
                  t        |d         k  rt        j                  j                  t        j                  t        j                  |                  \  }}}|d | j
                   t        j                  | j
                  t        j                        z  }|d d d | j
                  f   }|d | j
                  d d f   }	t        j                  t        j                  ||	            }t        j                  |      | _        | j                  ||       |r|D 
cg c]  }
| j!                  |
       c}
S y c c}
w Nr   )lenr   listmap
_normaliser   numpylinalgsvd	transposearrayidentityfloat64dotr
   cluster_vectorspaceclassify)r   vectorsassign_clusterstraceudvtSTDtvectors              r   clusterzVectorSpaceClusterer.cluster(   sP   7|a !!3t89G D$8$83wqz?$J))%//%++g:N*OPJQ2(D(()ENN$$emm- A !+t++++,A*d***A-.Booeii2&67Gq)DH 	  %0 8?@fDMM&)@@ @s   0Fc                      y)zD
        Finds the clusters using the given set of vectors.
        N )r   r#   r%   s      r   r!   z(VectorSpaceClusterer.cluster_vectorspaceA       r   c                     | j                   r| j                  |      }| j                   t        j                  | j                  |      }| j                  |      }| j                  |      S N)r   r   r
   r   r    classify_vectorspacecluster_name)r   r,   r-   s      r   r"   zVectorSpaceClusterer.classifyG   sX    !!__V,F88YYtxx0F++F3  ))r   c                      y)zN
        Returns the index of the appropriate cluster for the vector.
        Nr/   r   r,   s     r   r3   z)VectorSpaceClusterer.classify_vectorspaceO   r0   r   c                     | j                   r| j                  |      }| j                   t        j                  | j                  |      }| j                  ||      S r2   )r   r   r
   r   r    likelihood_vectorspace)r   r,   labels      r   
likelihoodzVectorSpaceClusterer.likelihoodU   sJ    !!__V,F88YYtxx0F**6599r   c                 6    | j                  |      }||k(  rdS dS )zP
        Returns the likelihood of the vector belonging to the cluster.
        g      ?g        )r3   )r   r,   r-   	predicteds       r   r8   z+VectorSpaceClusterer.likelihood_vectorspace\   s&     --f5	*s33r   c                     | j                   r| j                  |      }| j                   t        j                  | j                  |      }|S )zU
        Returns the vector after normalisation and dimensionality reduction
        )r   r   r
   r   r    r6   s     r   r,   zVectorSpaceClusterer.vectorc   s>     !!__V,F88YYtxx0Fr   c                 F    |t        t        j                  ||            z  S )z7
        Normalises the vector to unit length.
        r   r   r    r6   s     r   r   zVectorSpaceClusterer._normalisem   s     UYYvv6777r   )FN)FF)__name__
__module____qualname____doc__r   r-   r   r!   r"   r3   r:   r8   r,   r   r/   r   r   r   r      sS    
.A2  
*  
:48r   r   c                 J    | |z
  }t        t        j                  ||            S )z}
    Returns the euclidean distance between vectors u and v. This is equivalent
    to the length of the vector (u - v).
    r?   )r&   vdiffs      r   euclidean_distancerG   t   s#    
 q5D		$%&&r   c           	          dt        j                  | |      t        t        j                  | |             t        t        j                  ||            z  z  z
  S )zw
    Returns 1 minus the cosine of the angle between vectors v and u. This is
    equal to ``1 - (u.v / |u||v|)``.
       )r   r    r   )r&   rE   s     r   cosine_distancerJ   }   sA    
 		!Q4		!Q#84		!Q;P#PQRRr   c                   *    e Zd ZdZd ZddZd Zd Zy)_DendrogramNodezTree node of a dendrogram.c                      || _         || _        y r2   )_value	_children)r   valuechildrens      r   r   z_DendrogramNode.__init__   s    !r   c                     | j                   r5g }| j                   D ]"  }|j                  |j                  |             $ |S |r| j                  gS | gS r2   )rO   extendleavesrN   )r   valuesrT   childs       r   rT   z_DendrogramNode.leaves   sL    >>Fell623 (MKK= 6Mr   c                    | j                   | fg}t        |      |k  r|j                         \  }}|j                  s|j	                  ||f       nm|j                  D ]?  }|j                  r|j                  |j                   |f       -|j                  d|f       A |j                          t        |      |k  rg }|D ]$  \  }}|j                  |j                                & |S r   )rN   r   poprO   pushappendsortrT   )r   nqueueprioritynoderV   groupss          r   r`   z_DendrogramNode.groups   s    ++t$%%j1n"YY[NHd>>

Hd+,??LL%,,!67LL!U,	 ( JJL %j1n #NHdMM$++-( $r   c                 H    t        | j                  |j                        dk  S r   )rJ   rN   )r   
comparators     r   __lt__z_DendrogramNode.__lt__   s    t{{J,=,=>BBr   N)T)r@   rA   rB   rC   r   rT   r`   rc   r/   r   r   rL   rL      s    $"	*Cr   rL   c                   6    e Zd ZdZg fdZd Zd Zg fdZd Zy)
Dendrograma  
    Represents a dendrogram, a tree with a specified branching order.  This
    must be initialised with the leaf items, then iteratively call merge for
    each branch. This class constructs a tree representing the order of calls
    to the merge function.
    c                     |D cg c]  }t        |       c}| _        t        j                  | j                        | _        d| _        yc c}w )zs
        :param  items: the items at the leaves of the dendrogram
        :type   items: sequence of (any)
        rI   N)rL   _itemscopy_original_items_merge)r   itemsitems      r   r   zDendrogram.__init__   sA    
 :??t,?#yy5 @s   Ac                      t        |      dk\  sJ t         j                  g fd|D         } xj                  dz  c_        | j                  |d   <   |dd D ]  } j                  |=  y)a=  
        Merges nodes at given indices in the dendrogram. The nodes will be
        combined which then replaces the first node specified. All other nodes
        involved in the merge will be removed.

        :param  indices: indices of the items to merge (at least two)
        :type   indices: seq of int
           c              3   <   K   | ]  }j                   |     y wr2   )rg   ).0ir   s     r   	<genexpr>z#Dendrogram.merge.<locals>.<genexpr>   s     -Ngdkk!ngs   rI   r   N)r   rL   rj   rg   )r   indicesr_   rq   s   `   r   mergezDendrogram.merge   sk     7|q   t{{O-Ng-NOq"&GAJAA r   c                     t        | j                        dkD  r!t        | j                  g| j                   }n| j                  d   }|j	                  |      S )z
        Finds the n-groups of items (leaves) reachable from a cut at depth n.
        :param  n: number of groups
        :type   n: int
        rI   r   )r   rg   rL   rj   r`   )r   r\   roots      r   r`   zDendrogram.groups   sG     t{{a"4;;==D;;q>D{{1~r   c                    d\  }}}t        | j                        dkD  r!t        | j                  g| j                   }n| j                  d   }| j                  }|r|}n|D cg c]  }d|j
                  z   }}t        t        t         |            dz   dz  t        z
  dz
        dfd	}	d }
|j
                  |fg}|D cg c]
  } |	d       }}|rj|j                         \  }}t        t        d	 |j                              }t        t        |j                  |            }|rt        |      }t        |      }t        t        |            D ]{  }||   |v rH|k(  r |
 |	|d|             n&|k(  r |
 |	||d             n |
 |	|||              |	|      ||<   R|cxk  rk  rn n |
 |	|||             q |
||          }  |
d
       |j                  D ],  }|j                  s|j                  |j
                  |f       . |j!                          |D ]
  } |
|         |
d
       |rj |
dj#                  fd|D                      |
d
       yc c}w c c}w )z
        Print the dendrogram in ASCII art to standard out.

        :param leaf_labels: an optional list of strings to use for labeling the
                            leaves
        :type leaf_labels: list
        )+-|rI   r   z%srn    c                      |z   |  |z   S r2   r/   )centreleftrightlhalfrhalfs      r   formatzDendrogram.show.<locals>.format   s    dl^F8EEM?;;r   c                 .    t        j                  |        y r2   )r   write)strs    r   displayz Dendrogram.show.<locals>.display   s    LLr   c                 *    | j                  d      d   S )NFr   )rT   )cs    r   <lambda>z!Dendrogram.show.<locals>.<lambda>  s    %1Cr   
 c              3   @   K   | ]  }|j                          y wr2   )center)rp   rl   widths     r   rr   z"Dendrogram.show.<locals>.<genexpr>#  s     @xtE*xs   N)r{   r{   )r   rg   rL   rj   ri   rN   maxr   intrX   r   rO   indexminrangerZ   r[   join)r   leaf_labelsJOINHLINKVLINKrv   rT   last_rowleafr   r   r]   	verticalsr^   r_   child_left_leafrs   min_idxmax_idxrq   rV   verticalr   r   r   s                         @@@r   showzDendrogram.show   sQ    +eU t{{a"4;;==D;;q>D%%"H7=>vtt{{*vH> CX&'!+
EEMA%&	<	 ++t$%-34VTVC[V	4"YY[NHd"3'CT^^#TUO3v||_=>Gg,g,3v;'!9/G|tS% 89gtUC 89tUE :;#)%=IaL,W,F5%78IaL) ( DM??LL%,,!67 ( JJL%! &DM9 > 	@x@@Ac ?  5s   'I5I:c                     t        | j                        dkD  r!t        | j                  g| j                   }n| j                  d   }|j	                  d      }dt        |      z  S )NrI   r   Fz<Dendrogram with %d leaves>)r   rg   rL   rj   rT   )r   rv   rT   s      r   __repr__zDendrogram.__repr__&  sT    t{{a"4;;==D;;q>DU#,s6{::r   N)	r@   rA   rB   rC   r   rt   r`   r   r   r/   r   r   re   re      s,        
  " GR;r   re   )rh   abcr   mathr   sysr   r   ImportErrornltk.cluster.apir   r   rG   rJ   rL   re   r/   r   r   <module>r      sh       	 &\88 \8~'S(C (CV|; |;C  		s   A AA