
    g.                         d dl Z d dlmZ d dlmZ d dlmZ d dlmZ  G d de j                        Z
 G d d	e j                        Zy)
    N)closing)data)PorterStemmer)SnowballStemmerc                   *    e Zd Zd Zd Zd Zd Zd Zy)SnowballTestc                 6   t        dd      }|j                  d      dk(  sJ |j                  d      dk(  sJ |j                  d      dk(  sJ |j                  d      d	k(  sJ |j                  d
      d	k(  sJ |j                  d      d	k(  sJ |j                  d      d	k(  sJ |j                  d      dk(  sJ |j                  d      dk(  sJ t        dd      }|j                  d      dk(  sJ |j                  d      d	k(  sJ |j                  d      dk(  sJ t        d      }|j                  d      dk(  sJ |j                  d      dk(  sJ |j                  d      dk(  sJ |j                  d      d	k(  sJ |j                  d      dk(  sJ y)z
        this unit testing for test the snowball arabic light stemmer
        this stemmer deals with prefixes and suffixes
        arabicTu&   الْعَرَبِــــــيَّةu   عربu   العربيةu   فقالواu   قالu   الطالباتu   طالبu   فالطالباتu   والطالباتu   الطالبونu   اللذانu   منFu   اللذu   الكلماتu   كلمNr   stem)self
ar_stemmers     M/var/www/openai/venv/lib/python3.12/site-packages/nltk/test/unit/test_stem.pytest_arabiczSnowballTest.test_arabic
   s    %Xt4
GHHTTT/0H<<<~.(:::12j@@@34
BBB34
BBB12j@@@~..@@@v&&000$Xu5
~.*<<<12j@@@/0H<<<$X.
GHHTTT/0H<<<~.(:::12j@@@/0H<<<    c                 F    t        d      }|j                  d      dk(  sJ y )Nrussianu   авантненькаяu   авантненькr   )r   stemmer_russians     r   test_russianzSnowballTest.test_russian'   s'    ))4##$>?CYYYYr   c                     t        d      }t        dd      }|j                  d      dk(  sJ |j                  d      dk(  sJ |j                  d      dk(  sJ |j                  d      dk(  sJ y )NgermanT)ignore_stopwordsu	   Schränkeschrankkeinenkeinr   )r   stemmer_germanstemmer_german2s      r   test_germanzSnowballTest.test_german+   s|    (2)(TJ""=1Y>>>##M2i???""8,666##H-999r   c                 r    t        d      }|j                  d      dk(  sJ |j                  d      dk(  sJ y )Nspanish	Visionadovisionalguealgur   r   stemmers     r   test_spanishzSnowballTest.test_spanish5   s<    !),||K(H444 ||G$...r   c                 F    t        d      }|j                  d      dk(  sJ y )Nenglishzy'syr   r%   s     r   test_short_strings_bugz#SnowballTest.test_short_strings_bug=   s#    !),||E"c)))r   N)__name__
__module____qualname__r   r   r   r'   r+    r   r   r   r   	   s    =:Z:/*r   r   c                   6    e Zd Zd Zd Zd Zd Zd Zd Zd Z	y)	
PorterTestc                     t        t        j                  d      j                  d            5 }|j	                         j                         cd d d        S # 1 sw Y   y xY w)Nz*stemmers/porter_test/porter_vocabulary.txtutf-8encoding)r   r   findopenread
splitlinesr   fps     r   _vocabularyzPorterTest._vocabularyC   sR    IIBCHH  I 
 779'')
 
 
s   AA c                     t        |      }t        | j                         |      D ]4  \  }}|j                  |      }||k(  rJ dj	                  ||||              y )N)modez*{} should stem to {} in {} mode but got {})r   zipr<   r   format)r   stemmer_modeexpected_stemsr&   word	true_stemour_stems          r   _test_against_expected_outputz(PorterTest._test_against_expected_outputK   sh    \2"4#3#3#5~FOD)||D)HI%;BB	%  Gr   c                    t        t        j                  d      j                  d            5 }| j	                  t
        j                  |j                         j                                ddd       y# 1 sw Y   yxY w)az  Tests all words from the test vocabulary provided by M Porter

        The sample vocabulary and output were sourced from
        https://tartarus.org/martin/PorterStemmer/voc.txt and
        https://tartarus.org/martin/PorterStemmer/output.txt
        and are linked to from the Porter Stemmer algorithm's homepage
        at https://tartarus.org/martin/PorterStemmer/
        z-stemmers/porter_test/porter_martin_output.txtr3   r4   N)	r   r   r6   r7   rF   r   MARTIN_EXTENSIONSr8   r9   r:   s     r   test_vocabulary_martin_modez&PorterTest.test_vocabulary_martin_modeX   sj     IIEFKK  L 
 ..//1E1E1G
 
 
   =A55A>c                    t        t        j                  d      j                  d            5 }| j	                  t
        j                  |j                         j                                d d d        y # 1 sw Y   y xY w)Nz+stemmers/porter_test/porter_nltk_output.txtr3   r4   )	r   r   r6   r7   rF   r   NLTK_EXTENSIONSr8   r9   r:   s     r   test_vocabulary_nltk_modez$PorterTest.test_vocabulary_nltk_modej   sh    IICDII  J 
 ..--rwwy/C/C/E
 
 
rJ   c                    t        t        j                  d      j                  d            5 }| j	                  t
        j                  |j                         j                                d d d        | j	                  t
        j                  t        j                  d      j                  d      j                         j                                y # 1 sw Y   ixY w)Nz/stemmers/porter_test/porter_original_output.txtr3   r4   )	r   r   r6   r7   rF   r   ORIGINAL_ALGORITHMr8   r9   r:   s     r   test_vocabulary_original_modez(PorterTest.test_vocabulary_original_modet   s     IIGHMM  N 
 ..00"'')2F2F2H
 	**,,IIGHT7T#TVZ\	

 
s   =CCc                 @    t               j                  d      dk(  sJ y)zTest for bug https://github.com/nltk/nltk/issues/1581

        Ensures that 'oed' can be stemmed without throwing an error.
        oedoNr   r   )r   s    r   test_oed_bugzPorterTest.test_oed_bug   s    
 ##E*c111r   c                     t               }|j                  d      dk(  sJ |j                  d      dk(  sJ |j                  dd      dk(  sJ |j                  d      dk(  sJ |j                  dd      dk(  sJ y	)
zTest for improvement on https://github.com/nltk/nltk/issues/2507

        Ensures that stems are lowercased when `to_lowercase=True`
        OnonIiF)to_lowercaseGithubgithubNrT   )r   porters     r   test_lowercase_optionz PorterTest.test_lowercase_option   s    
 {{4 D((({{33&&&{{3U{3s:::{{8$000{{8%{8HDDDr   N)
r,   r-   r.   r<   rF   rI   rM   rP   rU   r_   r/   r   r   r1   r1   B   s&    *$
02
Er   r1   )unittest
contextlibr   nltkr   nltk.stem.porterr   nltk.stem.snowballr   TestCaser   r1   r/   r   r   <module>rf      s=       * .6*8$$ 6*r[E"" [Er   