
    g                     Z    d dl Z d dlmZ d dlmZ d dlmZ  G d de j                        Zy)    N)Counter)timeit)
Vocabularyc                       e Zd ZdZed        Zd Zd Zd Zd Z	d Z
d Zd	 Zd
 Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Zd Z ej4                  d      d        Zy)NgramModelVocabularyTestsztests Vocabulary Classc                 ,    t        g dd      | _        y N)zabcfdegr   r   r   r   w   
unk_cutoff)r   vocab)clss    V/var/www/openai/venv/lib/python3.12/site-packages/nltk/test/unit/lm/test_vocabulary.py
setUpClassz$NgramModelVocabularyTests.setUpClass   s    M
	    c                 :    | j                  | j                         y N)
assertTruer   selfs    r   test_truthinessz)NgramModelVocabularyTests.test_truthiness   s    

#r   c                 P    | j                  | j                  j                  d       y )Nr   )assertEqualr   cutoffr   s    r   test_cutoff_value_set_correctlyz9NgramModelVocabularyTests.test_cutoff_value_set_correctly   s    **A.r   c                 z    | j                  t              5  d| j                  _        d d d        y # 1 sw Y   y xY w)N   )assertRaisesAttributeErrorr   r#   r   s    r   test_unable_to_change_cutoffz6NgramModelVocabularyTests.test_unable_to_change_cutoff   s(    ~. !DJJ /..s   1:c                     | j                  t              5 }t        dd       d d d        d}| j                  |t	        j
                               y # 1 sw Y   1xY w)Nabcr   r   z*Cutoff value cannot be less than 1. Got: 0)r'   
ValueErrorr   r"   str	exception)r   exc_infoexpected_error_msgs      r   test_cutoff_setter_checks_valuez9NgramModelVocabularyTests.test_cutoff_setter_checks_value#   sL    z*hu+ +I+S1C1C-DE +*s   AAc                     | j                  | j                  j                  d   d       | j                  | j                  j                  d   d       | j                  | j                  j                  d   d       y )Nr   r   r   r      )r"   r   countsr   s    r   test_counts_set_correctlyz3NgramModelVocabularyTests.test_counts_set_correctly)   s_    **3/3**3/3**3/3r   c                     | j                  d| j                  v        | j                  d| j                  v        | j                  d| j                  v        y )Nr   r   r
   )r   r   assertFalser   s    r   %test_membership_check_respects_cutoffz?NgramModelVocabularyTests.test_membership_check_respects_cutoff.   sE    tzz)*

*+

*+r   c                 N    | j                  dt        | j                               y )N   )r"   lenr   r   s    r   test_vocab_len_respects_cutoffz8NgramModelVocabularyTests.test_vocab_len_respects_cutoff6   s     	C

O,r   c                     g d}g d}| j                  |t        | j                  j                  j	                                      | j                  |t        | j                               y )N)	r   r   r   r   r   r   r   r   r
   )r   r   r   r   <UNK>)assertCountEquallistr   r4   keys)r   vocab_countsvocab_itemss      r   test_vocab_iter_respects_cutoffz9NgramModelVocabularyTests.test_vocab_iter_respects_cutoff;   sM    D3lD1B1B1G1G1I,JKk4

+;<r   c                    t        d      }| j                  t        |      d       | j                  |       | j	                  |j
                  |       |j                  t        d             | j	                  |j
                  |       y )Nr   r   r   abcde)r   r"   r;   r7   assertIn	unk_labelupdater@   )r   emptys     r   test_update_empty_vocabz1NgramModelVocabularyTests.test_update_empty_vocabB   sb    a(UQ'eoou-T']#eoou-r   c                     | j                  | j                  j                  d      d       | j                  | j                  j                  d      d       y )Nr   r   r>   r"   r   lookupr   s    r   test_lookupz%NgramModelVocabularyTests.test_lookupK   s@    **3/5**3/9r   c           
         | j                  | j                  j                  ddg      d       | j                  | j                  j                  d      d       | j                  | j                  j                  d      d       | j                  | j                  j                  t        t        t        d                  d       y )Nr   r   r   r   )r   r   r   r>   r&   )r>   r>   r>   )r"   r   rN   mapr-   ranger   s    r   test_lookup_iterablesz/NgramModelVocabularyTests.test_lookup_iterablesO   s    **C:6
C**:6
C**:6GJJc#uQx013N	
r   c           	         | j                  | j                  j                  d      d       | j                  | j                  j                  g       d       | j                  | j                  j                  t        g             d       | j                  | j                  j                  d t	        dd      D              d       y )N c              3       K   | ]  }|  y wr   rW   ).0ns     r   	<genexpr>zHNgramModelVocabularyTests.test_lookup_empty_iterables.<locals>.<genexpr>[   s     *Bk1ks   r   )r"   r   rN   iterrT   r   s    r   test_lookup_empty_iterablesz5NgramModelVocabularyTests.test_lookup_empty_iterablesW   s    **2.3**2.3**484b9***BeAqk*BBBGr   c                 &   | j                  | j                  j                  ddgddgg      d       | j                  | j                  j                  ddgdg      d       | j                  | j                  j                  ddggggg      d       y )Nr   r   r   )rQ   rR   )rQ   r>   ))))rQ   rM   r   s    r   test_lookup_recursivez/NgramModelVocabularyTests.test_lookup_recursive]   s    JJSzC:679U	
 	**S#J+<=?TU**Szl^,<+=>@VWr   c                 .   | j                  t              5  | j                  j                  d        d d d        | j                  t              5  t	        | j                  j                  d d g             d d d        y # 1 sw Y   NxY w# 1 sw Y   y xY wr   r'   	TypeErrorr   rN   r@   r   s    r   test_lookup_Nonez*NgramModelVocabularyTests.test_lookup_Noned   sh    y)JJd# *y)""D$<01 *) *)))   A?'B?BBc                 .   | j                  t              5  | j                  j                  d       d d d        | j                  t              5  t	        | j                  j                  ddg             d d d        y # 1 sw Y   NxY w# 1 sw Y   y xY w)Nr3   r   ra   r   s    r   test_lookup_intz)NgramModelVocabularyTests.test_lookup_intj   sh    y)JJa  *y)""Aq6*+ *) *)))rd   c                 Z    | j                  | j                  j                  d      d       y )N r>   rM   r   s    r   test_lookup_empty_strz/NgramModelVocabularyTests.test_lookup_empty_strp   s!    **2.8r   c                     t        g dd      }t        g dd      }t        g ddd      }t        ddgd      }| j                  ||       | j                  ||       | j                  ||       y )N)r   r   r   r3   r   blah)r   rH   r   r   )r   r"   assertNotEqual)r   v1v2v3v4s        r   test_eqalityz&NgramModelVocabularyTests.test_eqalitys   sh    A6A6AHc
q1R B#B#r   c                 N    | j                  t        | j                        d       y )Nz8<Vocabulary with cutoff=2 unk_label='<UNK>' and 5 items>)r"   r-   r   r   s    r   test_strz"NgramModelVocabularyTests.test_str}   s    

OW	
r   c           	      h    | j                  | j                  t        t        g d      d             y r	   )r"   r   r   r   r   s    r   test_creation_with_counterz4NgramModelVocabularyTests.test_creation_with_counter   s/    JJU 		
r   z?Test is known to be flaky as it compares (runtime) performance.)reasonc                     t        d      }ddlm} t        |j                               }t	        dt                     }t	        dt                     }| j                  ||d       y )	NrF   r   )englishzlen(small_vocab))globalszlen(large_vocab)r3   )places)r   nltk.corpus.europarl_rawrx   wordsr   localsassertAlmostEqual)r   small_vocabrx   large_vocabsmall_vocab_len_timelarge_vocab_len_times         r   test_len_is_constantz.NgramModelVocabularyTests.test_len_is_constant   s]    
 !)4 1  &&8&(K%&8&(K 	35IRSTr   N)__name__
__module____qualname____doc__classmethodr   r    r$   r)   r1   r5   r8   r<   rD   rK   rO   rU   r]   r_   rc   rf   ri   rq   rs   ru   unittestskipr   rW   r   r   r   r      s     
 
$/"F4
,-
=.:
HX2,9$

	
 X]]PUUr   r   )r   collectionsr   r   nltk.lmr   TestCaser   rW   r   r   <module>r      s)       MU 1 1 MUr   