
    gT*                     ~    d dl Z d dlmZ  G d d      Z G d de      Zd Zd Zd	 Z ed
ddg      Z	 G d d      Z
y)    N)
namedtuplec                   t    e Zd ZdZddZed        Zed        Zd Zd Z	 eee	      Z
d Zd	 Zd
 Zd Zd Zy)AlignedSenta#  
    Return an aligned sentence object, which encapsulates two sentences
    along with an ``Alignment`` between them.

    Typically used in machine translation to represent a sentence and
    its translation.

        >>> from nltk.translate import AlignedSent, Alignment
        >>> algnsent = AlignedSent(['klein', 'ist', 'das', 'Haus'],
        ...     ['the', 'house', 'is', 'small'], Alignment.fromstring('0-3 1-2 2-0 3-1'))
        >>> algnsent.words
        ['klein', 'ist', 'das', 'Haus']
        >>> algnsent.mots
        ['the', 'house', 'is', 'small']
        >>> algnsent.alignment
        Alignment([(0, 3), (1, 2), (2, 0), (3, 1)])
        >>> from nltk.corpus import comtrans
        >>> print(comtrans.aligned_sents()[54])
        <AlignedSent: 'Weshalb also sollten...' -> 'So why should EU arm...'>
        >>> print(comtrans.aligned_sents()[54].alignment)
        0-0 0-1 1-0 2-2 3-4 3-5 4-7 5-8 6-3 7-9 8-9 9-10 9-11 10-12 11-6 12-6 13-13

    :param words: Words in the target language sentence
    :type words: list(str)
    :param mots: Words in the source language sentence
    :type mots: list(str)
    :param alignment: Word-level alignments between ``words`` and ``mots``.
        Each alignment is represented as a 2-tuple (words_index, mots_index).
    :type alignment: Alignment
    Nc                 z    || _         || _        |t        g       | _        y t	        |      t        u sJ || _        y N)_words_mots	Alignment	alignmenttype)selfwordsmotsr   s       G/var/www/openai/venv/lib/python3.12/site-packages/nltk/translate/api.py__init__zAlignedSent.__init__/   s:    
&r]DN	?i///&DN    c                     | j                   S r   )r   r   s    r   r   zAlignedSent.words8   s    {{r   c                     | j                   S r   )r	   r   s    r   r   zAlignedSent.mots<   s    zzr   c                     | j                   S r   )
_alignmentr   s    r   _get_alignmentzAlignedSent._get_alignment@   s    r   c                 x    t        t        | j                        t        | j                        |       || _        y r   )_check_alignmentlenr   r   r   )r   r   s     r   _set_alignmentzAlignedSent._set_alignmentC   s%    TZZ#dii.)D#r   c                     ddj                  d | j                  D              z  }ddj                  d | j                  D              z  }d| d| d| j                  dS )z_
        Return a string representation for this ``AlignedSent``.

        :rtype: str
        z[%s]z, c              3   &   K   | ]	  }d |z    ywz'%s'N .0ws     r   	<genexpr>z'AlignedSent.__repr__.<locals>.<genexpr>O   s     #D1FQJ   c              3   &   K   | ]	  }d |z    ywr   r    r!   s     r   r$   z'AlignedSent.__repr__.<locals>.<genexpr>P   s     "Bz!6A:zr%   zAlignedSent())joinr   r	   r   )r   r   r   s      r   __repr__zAlignedSent.__repr__I   s^     $))#D#DDE"Btzz"BBCeWBtfBt.ACCr   c                    d}|dz  }|dj                  | j                  D cg c]  }d| d| d c}      z  }|dj                  | j                  D cg c]  }d| d| d c}      z  }|dj                  | j                  D cg c](  \  }}d| j                  |    d| j                  |    d	* c}}      z  }t	        t        | j                        d
z
        D ]4  }|dj                  | j                  |   | j                  |d
z            z  }6 t	        t        | j                        d
z
        D ]4  }|dj                  | j                  |   | j                  |d
z            z  }6 |ddj                  d | j                  D              z  z  }|ddj                  d | j                  D              z  z  }|dz  }|S c c}w c c}w c c}}w )z<
        Dot representation of the aligned sentence
        zgraph align {
znode[shape=plaintext]
 "z_source" [label="z"] 
z_target" [label="z_source" -- "z
_target" 
   z)"{}_source" -- "{}_source" [style=invis]
z)"{}_target" -- "{}_target" [style=invis]
z{rank = same; %s}
 c              3   &   K   | ]	  }d |z    yw)z"%s_source"Nr    r!   s     r   r$   z&AlignedSent._to_dot.<locals>.<genexpr>v   s     .V+Q}q/@+r%   c              3   &   K   | ]	  }d |z    yw)z"%s_target"Nr    r!   s     r   r$   z&AlignedSent._to_dot.<locals>.<genexpr>w   s     .U*Q}q/@*r%   })r(   r   r	   r   ranger   format)r   sr#   uvis         r   _to_dotzAlignedSent._to_dotT   s    	&& 	
RWWL1!-aS6LMM	RWW

K
1!-aS6
KLL 	
RWW !OO+DAq DKKN#=A{K+
 	
 s4;;'!+,A=DDAAE" A - s4::*+A=DD

1

1q5! A , 	
"chh.V$++.V&VWW	"chh.U$**.U&UVV	S? MKs   G
G
	-Gc                 Z   | j                         j                  d      }d}	 t        j                  dd|z  gt        j                  t        j                  t        j                        }|j                  |      \  }}|j                  d      S # t
        $ r}t        d      |d}~ww xY w)zR
        Ipython magic : show SVG representation of this ``AlignedSent``.
        utf8svgdotz-T%s)stdinstdoutstderrz0Cannot find the dot binary from Graphviz packageN)	r8   encode
subprocessPopenPIPEOSError	Exceptioncommunicatedecode)r   
dot_stringoutput_formatprocesseouterrs          r   
_repr_svg_zAlignedSent._repr_svg_}   s     \\^**62
	W &&./ oo!!	G &&z2Szz&!!	  	WNOUVV	Ws   AB 	B*B%%B*c                     dj                  | j                        dd dz   }dj                  | j                        dd dz   }d| d| dS )zn
        Return a human-readable string representation for this ``AlignedSent``.

        :rtype: str
        r.   N   z...z<AlignedSent: 'z' -> 'z'>)r(   r   r	   )r   sourcetargets      r   __str__zAlignedSent.__str__   sU     $++&s+e3$**%cr*U2 vhb99r   c                 t    t        | j                  | j                  | j                  j	                               S )zm
        Return the aligned sentence pair, reversing the directionality

        :rtype: AlignedSent
        )r   r	   r   r   invertr   s    r   rU   zAlignedSent.invert   s(     4::t{{DOO4J4J4LMMr   r   )__name__
__module____qualname____doc__r   propertyr   r   r   r   r   r)   r8   rN   rS   rU   r    r   r   r   r      sh    >'    $ 8I	D'R"&:Nr   r   c                   L    e Zd ZdZd Zed        Zd Zd ZddZ	d Z
d	 Zd
 Zy)r
   ac  
    A storage class for representing alignment between two sequences, s1, s2.
    In general, an alignment is a set of tuples of the form (i, j, ...)
    representing an alignment between the i-th element of s1 and the
    j-th element of s2.  Tuples are extensible (they might contain
    additional data, such as a boolean to indicate sure vs possible alignments).

        >>> from nltk.translate import Alignment
        >>> a = Alignment([(0, 0), (0, 1), (1, 2), (2, 2)])
        >>> a.invert()
        Alignment([(0, 0), (1, 0), (2, 1), (2, 2)])
        >>> print(a.invert())
        0-0 1-0 2-1 2-2
        >>> a[0]
        [(0, 1), (0, 0)]
        >>> a.invert()[2]
        [(2, 1), (2, 2)]
        >>> b = Alignment([(0, 0), (0, 1)])
        >>> b.issubset(a)
        True
        >>> c = Alignment.fromstring('0-0 0-1')
        >>> b == c
        True
    c                     t         j                  | |      }|t        g       k7  rt        d |D              nd|_        d |_        |S )Nc              3   &   K   | ]	  }|d      ywr   Nr    r"   ps     r   r$   z$Alignment.__new__.<locals>.<genexpr>   s     +d!dr%   r   )	frozenset__new__max_len_index)clspairsr   s      r   rb   zAlignment.__new__   s?      e,/3y}/DC+d++!	r   c                 l    t        |j                         D cg c]  }t        |       c}      S c c}w )a  
        Read a giza-formatted string and return an Alignment object.

            >>> Alignment.fromstring('0-0 2-1 9-2 21-3 10-4 7-5')
            Alignment([(0, 0), (2, 1), (7, 5), (9, 2), (10, 4), (21, 3)])

        :type s: str
        :param s: the positional alignments in giza format
        :rtype: Alignment
        :return: An Alignment object corresponding to the string representation ``s``.
        )r
   split
_giza2pair)rf   r4   as      r   
fromstringzAlignment.fromstring   s+     ;A*Q-;<<;s   1c                 p    | j                   s| j                          | j                   j                  |      S )zN
        Look up the alignments that map from a given index or slice.
        )re   _build_index__getitem__)r   keys     r   ro   zAlignment.__getitem__   s,     {{{{&&s++r   c                 &    t        d | D              S )zI
        Return an Alignment object, being the inverted mapping.
        c              3   <   K   | ]  }|d    |d   f|dd z     yw)r-   r      Nr    r_   s     r   r$   z#Alignment.invert.<locals>.<genexpr>   s(     >A1Q41,12.s   )r
   r   s    r   rU   zAlignment.invert   s     >>>>r   Nc                    t               }| j                  s| j                          |s't        t	        t        | j                                    }|D ]'  }|j                  d | j                  |   D               ) t        |      S )z
        Work out the range of the mapping from the given positions.
        If no positions are specified, compute the range of the entire mapping.
        c              3   &   K   | ]	  \  }}|  y wr   r    )r"   _fs      r   r$   z"Alignment.range.<locals>.<genexpr>   s     6~tq!~r%   )setre   rn   listr2   r   updatesorted)r   	positionsimager`   s       r   r2   zAlignment.range   sg    
 {{U3t{{#345IALL6t{{1~66 e}r   c                     dt        |       z  S )M
        Produce a Giza-formatted string representing the alignment.
        zAlignment(%r))r{   r   s    r   r)   zAlignment.__repr__   s     --r   c                 D    dj                  d t        |       D              S )r   r.   c              3   ,   K   | ]  }d |dd z    yw)z%d-%dNrs   r    r_   s     r   r$   z$Alignment.__str__.<locals>.<genexpr>   s     >A!BQ%s   )r(   r{   r   s    r   rS   zAlignment.__str__   s     xx>>>>r   c                     t        | j                  dz         D cg c]  }g  c}| _        | D ]#  }| j                  |d      j                  |       % yc c}w )z
        Build a list self._index such that self._index[i] is a list
        of the alignments originating from word i.
        r-   r   N)r2   rd   re   append)r   rv   r`   s      r   rn   zAlignment._build_index   sS    
 $)Q#78#7ar#78AKK!$$Q'  9s   	Ar   )rV   rW   rX   rY   rb   classmethodrl   ro   rU   r2   r)   rS   rn   r    r   r   r
   r
      s>    2 = =,?.?(r   r
   c                 V    | j                  d      \  }}t        |      t        |      fS N-ri   int)pair_stringr7   js      r   rj   rj     s(    S!DAqq63q6>r   c                 X    | j                  d      \  }}}t        |      t        |      fS r   r   )r   r7   r   r`   s       r   _naacl2pairr   
  s*    $GAq!q63q6>r   c                      t        |      t        u sJ t         fd|D              st        d      t        fd|D              st        d      y)ab  
    Check whether the alignments are legal.

    :param num_words: the number of source language words
    :type num_words: int
    :param num_mots: the number of target language words
    :type num_mots: int
    :param alignment: alignment to be checked
    :type alignment: Alignment
    :raise IndexError: if alignment falls outside the sentence
    c              3   D   K   | ]  }d |d    cxk  xr k  nc   ywr^   r    )r"   pair	num_wordss     r   r$   z#_check_alignment.<locals>.<genexpr>  s#     >IDqDG'i''Is    z&Alignment is outside boundary of wordsc              3   V   K   | ]   }|d    du xs d|d    cxk  xr k  nc  " yw)r-   Nr   r    )r"   r   num_motss     r   r$   z#_check_alignment.<locals>.<genexpr>   s0     PidtAw$9!tAw"9"99is   &)z%Alignment is outside boundary of motsN)r   r
   all
IndexError)r   r   r   s   `` r   r   r     sQ     	?i'''>I>>ABBPiPP@AA Qr   PhraseTableEntry
trg_phraselog_probc                   (    e Zd ZdZd Zd Zd Zd Zy)PhraseTablezs
    In-memory store of translations for a given phrase, and the log
    probability of the those translations
    c                 "    t               | _        y r   )dictsrc_phrasesr   s    r   r   zPhraseTable.__init__-  s    6r   c                      | j                   |   S )a  
        Get the translations for a source language phrase

        :param src_phrase: Source language phrase of interest
        :type src_phrase: tuple(str)

        :return: A list of target language phrases that are translations
            of ``src_phrase``, ordered in decreasing order of
            likelihood. Each list element is a tuple of the target
            phrase and its log probability.
        :rtype: list(PhraseTableEntry)
        r   r   
src_phrases     r   translations_forzPhraseTable.translations_for0  s     
++r   c                     t        ||      }|| j                  vrg | j                  |<   | j                  |   j                  |       | j                  |   j                  d d       y)z
        :type src_phrase: tuple(str)
        :type trg_phrase: tuple(str)

        :param log_prob: Log probability that given ``src_phrase``,
            ``trg_phrase`` is its translation
        :type log_prob: float
        )r   r   c                     | j                   S r   )r   )rK   s    r   <lambda>z!PhraseTable.add.<locals>.<lambda>L  s    

r   T)rp   reverseN)r   r   r   sort)r   r   r   r   entrys        r   addzPhraseTable.add?  sf     !JJT---+-DZ($++E2$)).BD)Qr   c                     || j                   v S r   r   r   s     r   __contains__zPhraseTable.__contains__N  s    T----r   N)rV   rW   rX   rY   r   r   r   r   r    r   r   r   r   '  s    
",R.r   r   )rA   collectionsr   r   ra   r
   rj   r   r   r   r   r    r   r   <module>r      sZ     "QN QNh_(	 _(D

B* 0<2LM (. (.r   