
    g2                        d dl Z d dlmZ d dlmZ d dlmZ  eddg      Z e j                  e       e j                         d	               Z
e
j                  d
       e j                  dddd       e j                  ddddd       e j                  dddd       e j                  dddd       e j                  dddd      d                                            Zy)!    N)tqdm)word_tokenize)parallelize_preprocessz-hz--help)help_option_names)context_settingsc                       y )N r	       =/var/www/openai/venv/lib/python3.12/site-packages/nltk/cli.pyclir      s     	r
   tokenizez
--languagez-lenz1The language for the Punkt sentence tokenization.)defaulthelpz--preserve-lineTzIAn option to keep the preserve the sentence and not sentence tokenize it.)r   is_flagr   z--processesz-j   zNo. of processes.z
--encodingz-eutf8zSpecify encoding of file.z--delimiterz-d z%Specify delimiter to join the tokens.c           
         t        j                  d|      5 }t        j                  d|      5 }|dk(  rEt        |j                               D ](  }t	        |j                  t        |            d|       * nCt        t        |j                         |d      D ]  }t	        |j                  |      d|       ! d	d	d	       d	d	d	       y	# 1 sw Y   xY w# 1 sw Y   y	xY w)
z;This command tokenizes text stream using nltk.word_tokenizestdin)encodingstdoutr   
)endfileT)progress_barN)clickget_text_streamr   	readlinesprintjoinr   r   )	languagepreserve_line	processesr   	delimiterfinfoutlineoutlines	            r   tokenize_filer*      s    * 
		w	:c""8h?4 A~ 1D)..t)<=4dS 2  6!3==?ID G )..1t$G  @ 
;	:?? 
;	:s#   CBC>CC	CC$)r   r   nltkr   	nltk.utilr   dictCONTEXT_SETTINGSgroupversion_optionr   commandoptionr*   r	   r
   r   <module>r3      s      ,4*:;  ./	  0	 Z	<	 	T mT13FGlD&7RS4+RH T H &Hr
   