
    g                     2    d dl mZ d dlmZ  G d de      Zy)    )load)StemmerIc                   (    e Zd ZdZd Zd Zd Zd Zy)RSLPStemmerug  
    A stemmer for Portuguese.

        >>> from nltk.stem import RSLPStemmer
        >>> st = RSLPStemmer()
        >>> # opening lines of Erico Verissimo's "Música ao Longe"
        >>> text = '''
        ... Clarissa risca com giz no quadro-negro a paisagem que os alunos
        ... devem copiar . Uma casinha de porta e janela , em cima duma
        ... coxilha .'''
        >>> for token in text.split(): # doctest: +NORMALIZE_WHITESPACE
        ...     print(st.stem(token))
        clariss risc com giz no quadro-negr a pais que os alun dev copi .
        uma cas de port e janel , em cim dum coxilh .
    c                 ^   g | _         | j                   j                  | j                  d             | j                   j                  | j                  d             | j                   j                  | j                  d             | j                   j                  | j                  d             | j                   j                  | j                  d             | j                   j                  | j                  d             | j                   j                  | j                  d             y )Nzstep0.ptzstep1.ptzstep2.ptzstep3.ptzstep4.ptzstep5.ptzstep6.pt)_modelappend	read_rule)selfs    C/var/www/openai/venv/lib/python3.12/site-packages/nltk/stem/rslp.py__init__zRSLPStemmer.__init__5   s    4>>*564>>*564>>*564>>*564>>*564>>*564>>*56    c           	      ~   t        d|z   d      j                  d      }|j                  d      }|D cg c]
  }|dk7  s	| }}|D cg c]  }|d   dk7  s| }}|D cg c]  }|j                  d	d
       }}g }|D ]  }g }|j                  d
      }|j	                  |d   dd        |j	                  t        |d                |j	                  |d   dd        |j	                  |d   j                  d      D cg c]  }|dd 	 c}       |j	                  |        |S c c}w c c}w c c}w c c}w )Nznltk:stemmers/rslp/raw)formatutf8
 r   #z			         ,)r   decodesplitreplacer	   int)r   filenameruleslineslineruletokenstokens           r   r
   zRSLPStemmer.read_rule@   sJ   *X5eDKKFSD!"'6%$42:%6"':%$47c>%: 9>>fd+> DDZZ%F KKq	!B( KKF1I' KKq	!B( KK&)//#2FG2Fq2FGH LL# & 7 7: ?$ Hs#   
D+ D+
D0D0"D5D:
c                 ^   |j                         }|d   dk(  r| j                  |d      }|d   dk(  r| j                  |d      }| j                  |d      }| j                  |d      }|}| j                  |d      }||k(  r+|}| j                  |d	      }||k(  r| j                  |d
      }|S )Nr   sr   ar   r   r            )lower
apply_rule)r   word	prev_words      r   stemzRSLPStemmer.stema   s    zz| 8s???4+D 8s???4+D tQ' tQ' 	tQ'9I??4+Dy tQ/r   c                     | j                   |   }|D ]H  }t        |d         }|| d  |d   k(  st        |      ||d   z   k\  s3||d   vs;|d |  |d   z   } |S  |S )Nr   r   r   r   )r   len)r   r/   
rule_indexr!   r$   suffix_lengths         r   r.   zRSLPStemmer.apply_rule   s    J'DQLM]NO$Q/t9Q 7747*#Om^4tAw>  r   N)__name__
__module____qualname____doc__r   r
   r1   r.    r   r   r   r   $   s     	7B<
r   r   N)	nltk.datar   nltk.stem.apir   r   r:   r   r   <module>r=      s   @  "e( er   