
    g)"                     P    d Z ddlZddlmZmZ ddlmZ 	 d	dZd Z	 d
dZ		 d	dZ
y)z ChrF score implementation     N)Counterdefaultdict)ngramsc                 (    t        | g|g||||      S )uW  
    Calculates the sentence level CHRF (Character n-gram F-score) described in
     - Maja Popovic. 2015. CHRF: Character n-gram F-score for Automatic MT Evaluation.
       In Proceedings of the 10th Workshop on Machine Translation.
       https://www.statmt.org/wmt15/pdf/WMT49.pdf
     - Maja Popovic. 2016. CHRF Deconstructed: β Parameters and n-gram Weights.
       In Proceedings of the 1st Conference on Machine Translation.
       https://www.statmt.org/wmt16/pdf/W16-2341.pdf

    This implementation of CHRF only supports a single reference at the moment.

    For details not reported in the paper, consult Maja Popovic's original
    implementation: https://github.com/m-popovic/chrF

    The code should output results equivalent to running CHRF++ with the
    following options: -nw 0 -b 3

    An example from the original BLEU paper
    https://www.aclweb.org/anthology/P02-1040.pdf

        >>> ref1 = str('It is a guide to action that ensures that the military '
        ...            'will forever heed Party commands').split()
        >>> hyp1 = str('It is a guide to action which ensures that the military '
        ...            'always obeys the commands of the party').split()
        >>> hyp2 = str('It is to insure the troops forever hearing the activity '
        ...            'guidebook that party direct').split()
        >>> sentence_chrf(ref1, hyp1) # doctest: +ELLIPSIS
        0.6349...
        >>> sentence_chrf(ref1, hyp2) # doctest: +ELLIPSIS
        0.3330...

    The infamous "the the the ... " example

        >>> ref = 'the cat is on the mat'.split()
        >>> hyp = 'the the the the the the the'.split()
        >>> sentence_chrf(ref, hyp)  # doctest: +ELLIPSIS
        0.1468...

    An example to show that this function allows users to use strings instead of
    tokens, i.e. list(str) as inputs.

        >>> ref1 = str('It is a guide to action that ensures that the military '
        ...            'will forever heed Party commands')
        >>> hyp1 = str('It is a guide to action which ensures that the military '
        ...            'always obeys the commands of the party')
        >>> sentence_chrf(ref1, hyp1) # doctest: +ELLIPSIS
        0.6349...
        >>> type(ref1) == type(hyp1) == str
        True
        >>> sentence_chrf(ref1.split(), hyp1.split()) # doctest: +ELLIPSIS
        0.6349...

    To skip the unigrams and only use 2- to 3-grams:

        >>> sentence_chrf(ref1, hyp1, min_len=2, max_len=3) # doctest: +ELLIPSIS
        0.6617...

    :param references: reference sentence
    :type references: list(str) / str
    :param hypothesis: a hypothesis sentence
    :type hypothesis: list(str) / str
    :param min_len: The minimum order of n-gram this function should extract.
    :type min_len: int
    :param max_len: The maximum order of n-gram this function should extract.
    :type max_len: int
    :param beta: the parameter to assign more importance to recall over precision
    :type beta: float
    :param ignore_whitespace: ignore whitespace characters in scoring
    :type ignore_whitespace: bool
    :return: the sentence level CHRF score.
    :rtype: float
    )betaignore_whitespace)corpus_chrf)	reference
hypothesismin_lenmax_lenr   r   s         N/var/www/openai/venv/lib/python3.12/site-packages/nltk/translate/chrf_score.pysentence_chrfr      s(    V 		+     c                 ~    t        |       t        k7  rdj                  |       } |rt        j                  dd|       } | S )N z\s+ )typestrjoinresub)sentr   s     r   _preprocessr   e   s5    DzSxx~vvfb$'Kr   c                 t   t        t        | |            }t        t        ||            }||z  }t        |j                               }t        |j                               }	t        |j                               }
	 ||	z  }||
z  }|dz  }d|z   ||z  z  ||z  |z   z  }||||fS # t        $ r	 |x}x}}Y w xY w)ax  
    This function computes the precision, recall and fscore from the ngram
    overlaps. It returns the `support` which is the true positive score.

    By underspecifying the input type, the function will be agnostic as to how
    it computes the ngrams and simply take the whichever element in the list;
    it could be either token or character.

    :param reference: The reference sentence.
    :type reference: list
    :param hypothesis: The hypothesis sentence.
    :type hypothesis: list
    :param n: Extract up to the n-th order ngrams
    :type n: int
    :param beta: The parameter to assign more importance to recall over precision.
    :type beta: float
    :param epsilon: The fallback value if the hypothesis or reference is empty.
    :type epsilon: float
    :return: Returns the precision, recall and f-score and support (true positive).
    :rtype: tuple(float)
          )r   r   sumvaluesZeroDivisionError)r
   r   nr   epsilon
ref_ngrams
hyp_ngramsoverlap_ngramstptpfptpfnprecrecfactorfscores                  r   $chrf_precision_recall_fscore_supportr-   o   s    0 	1-.J
A./J  *,N	^""$	%Bz  "#Dz  "#D&Dy4iqf*,0CD fb    &%%%sV&s   <#B% %B76B7c                    t        |       t        |      k(  sJ d       t        |      }t        t              }t        | |      D ]Y  \  }}	t	        ||      }t	        |	|      }	t        ||dz         D ]*  }
t        ||	|
|      \  }}}}||
   j                  |       , [ t        |      }|j                         D 
cg c]  \  }
}t        |       }}
}t        |      |z  |z  S c c}}
w )aq  
    Calculates the corpus level CHRF (Character n-gram F-score), it is the
    macro-averaged value of the sentence/segment level CHRF score.

    This implementation of CHRF only supports a single reference at the moment.

        >>> ref1 = str('It is a guide to action that ensures that the military '
        ...            'will forever heed Party commands').split()
        >>> ref2 = str('It is the guiding principle which guarantees the military '
        ...            'forces always being under the command of the Party').split()
        >>>
        >>> hyp1 = str('It is a guide to action which ensures that the military '
        ...            'always obeys the commands of the party').split()
        >>> hyp2 = str('It is to insure the troops forever hearing the activity '
        ...            'guidebook that party direct')
        >>> corpus_chrf([ref1, ref2, ref1, ref2], [hyp1, hyp2, hyp2, hyp1]) # doctest: +ELLIPSIS
        0.3910...

    :param references: a corpus of list of reference sentences, w.r.t. hypotheses
    :type references: list(list(str))
    :param hypotheses: a list of hypothesis sentences
    :type hypotheses: list(list(str))
    :param min_len: The minimum order of n-gram this function should extract.
    :type min_len: int
    :param max_len: The maximum order of n-gram this function should extract.
    :type max_len: int
    :param beta: the parameter to assign more importance to recall over precision
    :type beta: float
    :param ignore_whitespace: ignore whitespace characters in scoring
    :type ignore_whitespace: bool
    :return: the sentence level CHRF score.
    :rtype: float
    z@The number of hypotheses and their references should be the samer   )r   )
lenr   listzipr   ranger-   appenditemsr   )
references
hypothesesr   r   r   r   	num_sentsngram_fscoresr
   r   r!   r)   r*   r,   r&   num_ngram_sizesfscorestotal_scoress                     r   r	   r	      s   J z?c  JIJ  JI  %M "%Z!<	:	+<=	 ->?
 w!,A$H:qt%!D#vr !##F+ - "= -(O 4A3F3F3HI3HZQCL3HLI /9<< Js   ?C()r            @T)r=   gؗҜ<)__doc__r   collectionsr   r   	nltk.utilr   r   r   r-   r	    r   r   <module>rB      s>    " 	 ,  NRRj 16(!X OSC=r   