
    g{L                     l   d dl Z d dl mZ d dlmZ d dlZd dlmZmZm	Z	m
Z
mZmZmZmZ d dlmZ  ej"                  d      d        Z ej"                  d      d	        Z ej"                  d      d
        Z ej"                  d      d        Zej"                  d        Zej.                  j1                  dddgdfddgd fdddg      d        Zd Zd Zd Zd Zej"                  d        Zej.                  j1                  ddddgdfdddg      d        Zej"                  d        Z ej.                  j1                  dddgd fd!d"d#g      d$        Z!d% Z"ej"                  d&        Z#ej.                  j1                  dddgd fddgd'fdd(dgd fdd(dgd'fg      d)        Z$ej"                  d*        Z%ej.                  j1                  dddgd+fd,d-d.g      d/        Z&d0 Z'd1 Z(ej"                  d2        Z)ej.                  j1                  dd3d4ddd(gd5fdd6d(gd7fdd8d(gd5fg      d9        Z*ej"                  d:        Z+ej.                  j1                  dd;d4ddd(gd<fdd6d(gd=fdd8d(gd<fg      d>        Z,ej"                  d?        Z-ej.                  j1                  dd3d4ddd(gd@fdd6d(gdAfdd8d(gd@fg      dB        Z.ej"                  dC        Z/ej.                  j1                  dd3d4ddd(gdDfdd6d(gdEfdd8d(gdFfg      dG        Z0 ej"                  d      dH        Z1ej.                  j1                  dIdJdKdLdMdNdOdP ejd                  dQej.                  jg                  dRS      T      g      ej.                  j1                  dUg dV ed       W      dX               Z4dY Z5dZ Z6d[ Z7d\ Z8d] Z9d^ Z:d_ Z;y)`    N)fsum)
itemgetter)MLEAbsoluteDiscountingInterpolatedKneserNeyInterpolatedLaplaceLidstoneStupidBackoff
VocabularyWittenBellInterpolated)padded_everygramssession)scopec                       t        g dd      S )N)abcdz<s></s>   )
unk_cutoff)r        R/var/www/openai/venv/lib/python3.12/site-packages/nltk/test/unit/lm/test_models.py
vocabularyr      s    >1MMr   c                      g dg dgS )N)r   r   r   r   )egr   r   r   r   r   r   r   r   training_datar!      s     "@AAr   c           	      R    | D cg c]  }t        t        d|             c}S c c}w )N   listr   r!   sents     r   bigram_training_datar(   $   '    9FGD"1d+,GGG   $c           	      R    | D cg c]  }t        t        d|             c}S c c}w )N   r$   r&   s     r   trigram_training_datar-   )   r)   r*   c                 B    t        d|       }|j                  |       |S Nr#   r   r   fit)r   r(   models      r   mle_bigram_modelr4   .   s     j)E	II"#Lr   zword, context, expected_scorer   r   r   r   )r   Nr   )r   Ng$I$I?)yNg۶m۶m?c                 Z    t        j                  | j                  ||      d      |k(  sJ y N-C6?pytestapproxscore)r4   wordcontextexpected_scores       r   test_mle_bigram_scoresr@   5   s+      ==)//g>EWWWr   c                 T    t        j                  | j                  ddg            sJ y )Nr   r   )mathisinflogscore)r4   s    r   'test_mle_bigram_logscore_for_zero_scorerE   H   s$    ::&//cU;<<<r   c                     g d}d}d}t        j                  | j                  |      d      |k(  sJ t        j                  | j                  |      d      |k(  sJ y )N)r   r   )r   r   )r   <UNK>)rH   r   )r   r   r   r   g(\?g_vO@r8   r:   r;   entropy
perplexity)r4   trainedHrL   s       r   'test_mle_bigram_entropy_perplexity_seenrO   L   s_    G" 	AJ==)11':DAQFFF==)44W=tD
RRRr   c                     g d}t        j                  | j                  |            sJ t        j                  | j                  |            sJ y )N)rG   r   r   )r   r   rI   )rB   rC   rK   rL   )r4   	untraineds     r   )test_mle_bigram_entropy_perplexity_unseenrS   e   sC    EI::&..y9:::::&11)<===r   c                     d}d}g d}t        j                  | j                  |      d      |k(  sJ t        j                  | j                  |      d      |k(  sJ y )Ng~jt@gs @)r   r   r   )-r   rW   )r   r8   rJ   )r4   rN   rL   texts       r   +test_mle_bigram_entropy_perplexity_unigramsr[   m   s]     	AJHD==)11$7>!CCC==)44T:DAZOOOr   c                 B    t        d|      }|j                  |        |S Nr,   orderr   r1   r-   r   r3   s      r   mle_trigram_modelra      s     aJ/E	II#$Lr   )r   )r   r   r   )r   Ngqq?)r5   NUUUUUU?c                 Z    t        j                  | j                  ||      d      |k(  sJ y r7   r9   )ra   r=   r>   r?   s       r   test_mle_trigram_scoresrd      s+    $ ==*00w?F.XXXr   c                 D    t        dd|      }|j                  |        |S )N皙?r#   r^   r	   r2   r(   r   r3   s      r   lidstone_bigram_modelri      s"    Sj9E	II"#Lr   g88?)r   Ng"u)?)r   Ngк{?)r5   NgL?c                 Z    t        j                  | j                  ||      d      |k(  sJ y r7   r9   )ri   r=   r>   r?   s       r   test_lidstone_bigram_scorerk      s2    4 	+11$@$G		r   c                     g d}d}d}t        j                  | j                  |      d      |k(  sJ t        j                  | j                  |      d      |k(  sJ y )NrG   rQ   )r   rH   )rH   r   )r   r   )r   r   g=,Ԛ]@g鷯1@r8   rJ   )ri   rZ   rN   rL   s       r    test_lidstone_entropy_perplexityrn      s_    D$ 	AJ==.66t<dCqHHH==.99$?F*TTTr   c                 D    t        dd|      }|j                  |        |S )Nrf   r,   r^   rg   r`   s      r   lidstone_trigram_modelrp      s"    Sj9E	II#$Lr   gqq?r   c                 Z    t        j                  | j                  ||      d      |k(  sJ y r7   r9   )rp   r=   r>   r?   s       r   test_lidstone_trigram_scorerr      s2     	,224A4H		r   c                 B    t        d|      }|j                  |        |S r/   )r   r2   rh   s      r   laplace_bigram_modelrt      s     A*-E	II"#Lr   gqq?)r   NgtE]t?)r   NgF]tE?)r5   NgF]tE?c                 Z    t        j                  | j                  ||      d      |k(  sJ y r7   r9   )rt   r=   r>   r?   s       r   test_laplace_bigram_scorerv      s-    6 	*00w?F.XXr   c                     g d}d}d}t        j                  | j                  |      d      |k(  sJ t        j                  | j                  |      d      |k(  sJ y )Nrm   gQ	@gݓz!@r8   rJ   )rt   rZ   rN   rL   s       r   &test_laplace_bigram_entropy_perplexityrx     s_    D$ 	AJ==-55d;TBaGGG==-88>ESSSr   c                 &    | j                   dk(  sJ y )Nr   )gamma)rt   s    r   test_laplace_gammar{   5  s    %%***r   c                 B    t        d|      }|j                  |        |S )Nr,   r0   )r   r2   r`   s      r   wittenbell_trigram_modelr}   9  s     "1<E	II#$Lr   )r   Ngqq?)r   Ng        grq?r   gqq?r   c                 Z    t        j                  | j                  ||      d      |k(  sJ y r7   r9   )r}   r=   r>   r?   s       r   test_wittenbell_trigram_scorer   @  s3    D 	.44T7CTJ		r   c                 D    t        dd|      }|j                  |        |S )Nr,   g      ?)r_   discountr   r   r2   r`   s      r   kneserney_trigram_modelr   r  s"    !DZPE	II#$Lr   )r   Ng$I$I?gm۶m?g$I$I?c                 Z    t        j                  | j                  ||      d      |k(  sJ y r7   r9   )r   r=   r>   r?   s       r   test_kneserney_trigram_scorer   y  s3    P 	-33D'BDI		r   c                 B    t        d|      }|j                  |        |S r]   )r   r2   r`   s      r   "absolute_discounting_trigram_modelr     s     +!
KE	II#$Lr   rb   g      ?c                 Z    t        j                  | j                  ||      d      |k(  sJ y r7   r9   )r   r=   r>   r?   s       r   'test_absolute_discounting_trigram_scorer     s3    J 	8>>tWMtT		r   c                 B    t        d|      }|j                  |        |S r]   )r
   r2   r`   s      r   stupid_backoff_trigram_modelr     s     j9E	II#$Lr   g      ?      ?g?c                 Z    t        j                  | j                  ||      d      |k(  sJ y r7   r9   )r   r=   r>   r?   s       r   !test_stupid_backoff_trigram_scorer     s2    2 	288wGN		r   c                 B    t        d|      }|j                  |        |S )Nr#   r^   r   rh   s      r   kneserney_bigram_modelr     s     !jAE	II"#Lr   model_fixturer4   ra   ri   rt   r}   r   r   r   z*Stupid Backoff is not a valid distribution)reason)marksr>   )	rV   rW   rU   )r   rH   rY   )r   )r)w)idsc                     |j                  |       t        fdj                  D              }t        j                  |d      dk(  sJ y )Nc              3   B   K   | ]  }j                  |        y wN)r<   ).0r   r>   r3   s     r   	<genexpr>z!test_sums_to_1.<locals>.<genexpr>!  s     JkU[[G4ks   gHz>r   )getfixturevaluesumvocabr:   r;   )r   r>   requestscores_for_contextr3   s    `  @r   test_sums_to_1r     sD    0 ##M2EJekkJJ==+T2c999r   c                 2    | j                  d      dk(  sJ y )Nr,   random_seedrH   generatera   s    r   test_generate_one_no_contextr   *  s    %%!%4???r   c                     | j                  dg      dk(  sJ | j                  ddg      dk(  sJ | j                  ddg      dk(  sJ y )Nr   	text_seedr   r   r   r   r   s    r   'test_generate_one_from_limiting_contextr   .  s`    %%%6#===%%c
%;sBBB%%c
%;sBBBr   c                 4    | j                  dd      dk(  sJ y )N)r   r   r#   r   r   r   r   r   s    r   %test_generate_one_from_varied_contextr   5  s!    %%!%LPSSSSr   c                     t        | j                  t        d            g}| j                  |       | j	                  ddd      g dk(  sJ y )Nbdbdbd   )r   r      r   )r   r   r   r   r   r   r   )r   r_   r%   r2   r   )ra   more_training_texts     r   test_generate_cycler   :  sV    +,=,C,CT(^TU,-%%a:1%M R   r   c                 :    | j                  ddd      g dk(  sJ y )Nr   )r   r   r,   r   )rH   r   r   r   rH   r   r   s    r   test_generate_with_text_seedr   K  s,    %%a<Q%O T   r   c                 V    | j                  dd      | j                  dd      k(  sJ y )N)aliensr,   r   r   r   r   s    r   test_generate_oov_text_seedr   U  s?    %%1 & 		#	#ja	#	HI I Ir   c                     t        j                  t              5  | j                  d       d d d        | j                  d d      | j                  d      k(  sJ y # 1 sw Y   2xY w)Nr   r   r,   r   r   )r:   raises	TypeErrorr   r   s    r   test_generate_None_text_seedr   [  sg    	y	!""W"5 
" %%A & 		#	#	#	23 3 3	 
"	!s   AA')<rB   r   r   operatorr   r:   nltk.lmr   r   r   r   r	   r
   r   r   nltk.lm.preprocessingr   fixturer   r!   r(   r-   r4   markparametrizer@   rE   rO   rS   r[   ra   rd   ri   rk   rn   rp   rr   rt   rv   rx   r{   r}   r   r   r   r   r   r   r   r   paramxfailr   r   r   r   r   r   r   r   r   r   r   <module>r      s      	 	 	 4 i N !N i B !B i H !H i H !H   #	seQ	seQ 	XX=S2>P(   # 	 
seQ"Y#"Y   # 
seY 	  	  	 '010U2   # 
seY	seY	sCj)$	sCj)$   # 
seW 	 	 	)232T2+   # 	 	 	 
se34
 
sCjLM 
sCj9;3<=<(   # 	 	 	 
se-/ 
sCjAB 
sCj24? #HI#H   # 	 	 	 
se79 
sCjKL 
sCj<>9 BC B   # 	 	 		seU	sCj%  
sCj=*!*+* i  ! ", *++##C $ 	
$ R1  
:%.:@CT
"I3r   