
    g                     x    d dl mZ d dlmZ d dlmZmZ ej                  Z eedddd      Z	de	_
        d	 Zd
 Zy)    )partial)chain)
everygramspad_sequenceTz<s>z</s>)pad_leftleft_pad_symbol	pad_rightright_pad_symbolzPads both ends of a sentence to length specified by ngram order.

    Following convention <s> pads the start of sentence </s> pads its end.
    c                 D    t        t        t        ||             |       S )zpHelper with some useful defaults.

    Applies pad_both_ends to sentence and follows it up with everygrams.
    nmax_len)r   listpad_both_ends)ordersentences     J/var/www/openai/venv/lib/python3.12/site-packages/nltk/lm/preprocessing.pypadded_everygramsr      s    
 d=U;<eLL    c                 j     t        t                fd|D        t        t        |            fS )a  Default preprocessing for a sequence of sentences.

    Creates two iterators:

    - sentences padded and turned into sequences of `nltk.util.everygrams`
    - sentences padded as above and chained together for a flat stream of words

    :param order: Largest ngram length produced by `everygrams`.
    :param text: Text to iterate over. Expected to be an iterable of sentences.
    :type text: Iterable[Iterable[str]]
    :return: iterator over text as ngrams, iterator over text as vocabulary data
    r   c              3   V   K   | ]   }t        t         |                    " yw)r   N)r   r   ).0sentr   
padding_fns     r   	<genexpr>z,padded_everygram_pipeline.<locals>.<genexpr>1   s&     LttDD)*E	:	:ts   &))r   r   flattenmap)r   textr   s   ` @r   padded_everygram_pipeliner    "   s2     %0JLtLJ%& r   N)	functoolsr   	itertoolsr   	nltk.utilr   r   from_iterabler   r   __doc__r   r     r   r   <module>r'      sI      .


 Mr   