
    g2?                         d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlmZm	Z	m
Z
 d dlmZ d dlmZ d dlmZ d Zd Zd	 Z G d
 de      Zedk(  r	 d dlZ ej.                          yy)    N)ZipFilePathPointer)find_dir	find_filefind_jars_within_path)ParserI)DependencyGraph)taggedsents_to_conllc                  :    ddl m}   | g d      }|j                  S )Nr   )RegexpTagger))z\.$.)z\,$,)z\?$?)z\($()z\)$))z\[$[)z\]$])z^-?[0-9]+(\.[0-9]+)?$CD)z(The|the|A|a|An|an)$DT)z&(He|he|She|she|It|it|I|me|Me|You|you)$PRP)z(His|his|Her|her|Its|its)$PRP$)z(my|Your|your|Yours|yours)$r   )z (on|On|in|In|at|At|since|Since)$IN)z (for|For|ago|Ago|before|Before)$r   )z(till|Till|until|Until)$r   )z(by|By|beside|Beside)$r   )z(under|Under|below|Below)$r   )z(over|Over|above|Above)$r   )z (across|Across|through|Through)$r   )z(into|Into|towards|Towards)$r   )z(onto|Onto|from|From)$r   )z.*able$JJ)z.*ness$NN)z.*ly$RB)z.*s$NNS)z.*ing$VBG)z.*ed$VBD)z.*r   )nltk.tagr   tag)r   _taggers     D/var/www/openai/venv/lib/python3.12/site-packages/nltk/parse/malt.pymalt_regex_taggerr"      s!    %	
G@ ;;    c                 l   t         j                  j                  |       r| }nt        | d      }g d}t	        t        |            }|D ch c]$  }t         j                  j                  |      d   & }}h d}|j                  |      sJ t        t        d |            sJ t        |      S c c}w )zE
    A module to find MaltParser .jar file and its dependencies.
    )MALT_PARSER)env_vars) r'   r'      >   	log4j.jar
libsvm.jarliblinear-1.8.jarc                 J    | j                  d      xr | j                  d      S )Nzmaltparser-z.jar)
startswithendswith)is    r!   <lambda>z!find_maltparser.<locals>.<lambda>M   s    m4KF9KKr#   )ospathexistsr   setr   splitissubsetanyfilterlist)parser_dirname	_malt_dirmalt_dependencies
_malt_jarsjar_jarss         r!   find_maltparserr@   =   s     
ww~~n%"	^6FG	$*956J.89jsRWW]]3"jE9H%%e,,,KUS   
 :s   )B1c                 f    | yt         j                  j                  |       r| S t        | dd      S )z8
    A module to find pre-trained MaltParser model.
    malt_temp.mco)
MALT_MODELF)r&   verbose)r1   r2   r3   r   )model_filenames    r!   find_malt_modelrF   R   s2     		'/5QQr#   c                   Z    e Zd ZdZ	 	 	 	 d
dZddZddZddZedd       Z	ddZ
dd	Zy)
MaltParsera  
    A class for dependency parsing with MaltParser. The input is the paths to:
    - (optionally) a maltparser directory
    - (optionally) the path to a pre-trained MaltParser .mco model file
    - (optionally) the tagger to use for POS tagging before parsing
    - (optionally) additional Java arguments

    Example:
        >>> from nltk.parse import malt
        >>> # With MALT_PARSER and MALT_MODEL environment set.
        >>> mp = malt.MaltParser(model_filename='engmalt.linear-1.7.mco') # doctest: +SKIP
        >>> mp.parse_one('I shot an elephant in my pajamas .'.split()).tree() # doctest: +SKIP
        (shot I (elephant an) (in (pajamas my)) .)
        >>> # Without MALT_PARSER and MALT_MODEL environment.
        >>> mp = malt.MaltParser('/home/user/maltparser-1.9.2/', '/home/user/engmalt.linear-1.7.mco') # doctest: +SKIP
        >>> mp.parse_one('I shot an elephant in my pajamas .'.split()).tree() # doctest: +SKIP
        (shot I (elephant an) (in (pajamas my)) .)
    Nc                     t        |      | _        ||ng | _        t        |      | _        | j                  dk7  | _        t        j                         | _        ||| _
        yt               | _
        y)a  
        An interface for parsing with the Malt Parser.

        :param parser_dirname: The path to the maltparser directory that
            contains the maltparser-1.x.jar
        :type parser_dirname: str
        :param model_filename: The name of the pre-trained model with .mco file
            extension. If provided, training will not be required.
            (see http://www.maltparser.org/mco/mco.html and
            see http://www.patful.com/chalk/node/185)
        :type model_filename: str
        :param tagger: The tagger used to POS tag the raw string before
            formatting to CONLL format. It should behave like `nltk.pos_tag`
        :type tagger: function
        :param additional_java_args: This is the additional Java arguments that
            one can use when calling Maltparser, usually this is the heapsize
            limits, e.g. `additional_java_args=['-Xmx1024m']`
            (see https://goo.gl/mpDBvQ)
        :type additional_java_args: list
        NrB   )r@   	malt_jarsadditional_java_argsrF   model_trainedtempfile
gettempdirworking_dirr"   tagger)selfr:   rE   rQ   rK   s        r!   __init__zMaltParser.__init__r   sj    : )8 %9$D " 	! %^4


o5#..0 & 2f8I8Kr#   c           
   #   v  K   | j                   st        d      t        j                  d| j                  dd      5 }t        j                  d| j                  dd      5 }t        |      D ]  }|j                  t        |              |j                          | j                  |j                  |j                  d      }t        j                         }	 t        j                  t        j                  j                  | j                         d	          | j#                  ||      }	t        j                  |       |	d	k7  rt        d
dj%                  |      |	fz        t'        |j                        5 }
|
j)                         j                  d      D ]  }t+        t-        ||      g        	 ddd       ddd       ddd       t        j.                  j                         t        j.                  j                         y#  Y xY w# 1 sw Y   _xY w# 1 sw Y   cxY w# 1 sw Y   gxY ww)a  
        Use MaltParser to parse multiple POS tagged sentences. Takes multiple
        sentences where each sentence is a list of (word, tag) tuples.
        The sentences must have already been tokenized and tagged.

        :param sentences: Input sentences to parse
        :type sentence: list(list(tuple(str, str)))
        :return: iter(iter(``DependencyGraph``)) the dependency graph
            representation of each sentence
        z0Parser has not been trained. Call train() first.zmalt_input.conll.wFprefixdirmodedeletezmalt_output.conll.parserY   r   z0MaltParser parsing (%s) failed with exit code %d z

top_relation_labelN)rM   	ExceptionrN   NamedTemporaryFilerP   r	   writestrclosegenerate_malt_commandnamer1   getcwdchdirr2   r5   rL   _executejoinopenreaditerr   remove)rR   	sentencesrD   r_   
input_fileoutput_filelinecmd_current_pathretinfiletree_strs               r!   parse_tagged_sentszMaltParser.parse_tagged_sents   s     }}NOO((&D,<,<3u
,,+$$	
 0;D$$SY/ <  " 00OO[%5%5G 1  !#		HHRWW]]4::6q9: mmC1'!8#"%(XXc]C$89  +**+v$*KKM$7$7$? $3(0EW%&!" %@ ,E
b 			*//"
		+""#3 ,+E 
 
sg   ;H9$H-!A7H!?HA H!8>H7H!?H-AH9HH!HH!!H*	&H--H62H9c                 B      fd|D        } j                  |||      S )an  
        Use MaltParser to parse multiple sentences.
        Takes a list of sentences, where each sentence is a list of words.
        Each sentence will be automatically tagged with this
        MaltParser instance's tagger.

        :param sentences: Input sentences to parse
        :type sentence: list(list(str))
        :return: iter(DependencyGraph)
        c              3   @   K   | ]  }j                  |        y w)N)rQ   ).0sentencerR   s     r!   	<genexpr>z)MaltParser.parse_sents.<locals>.<genexpr>   s     L)hDKK1)s   r^   )rx   )rR   ro   rD   r_   tagged_sentencess   `    r!   parse_sentszMaltParser.parse_sents   s1     M)L&&g:L ' 
 	
r#   c                    dg}|| j                   z  }t        j                  j                  d      rdnd}|d|j	                  | j
                        gz  }|dgz  }t        j                  j                  | j                        r2|dt        j                  j                  | j                        d   gz  }n|d| j                  gz  }|d	|gz  }|d
k(  r|d|gz  }|d|gz  }|S )a  
        This function generates the maltparser command use at the terminal.

        :param inputfilename: path to the input file
        :type inputfilename: str
        :param outputfilename: path to the output file
        :type outputfilename: str
        javawin;:z-cpzorg.maltparser.Maltz-cz-ir[   z-oz-m)rK   sysplatformr-   rj   rJ   r1   r2   r3   rL   r5   )rR   inputfilenameoutputfilenamerY   rs   classpaths_separators         r!   re   z MaltParser.generate_malt_command   s     ht(((&)ll&=&=e&Ds# %%dnn5
 	
 	%&& 77>>$**%D"''--

3B788CD$**%%Cm$$7?D.))Cd|
r#   c                 z    |rd nt         j                  }t        j                  | ||      }|j                         S )N)stdoutstderr)
subprocessPIPEPopenwait)rs   rD   outputps       r!   ri   zMaltParser._execute  s.     jooS?vvxr#   c                 P   t        j                  d| j                  dd      5 }dj                  d |D              }|j	                  t        |             ddd       | j                  j                  |       t        j                  |j                         y# 1 sw Y   FxY w)	z
        Train MaltParser from a list of ``DependencyGraph`` objects

        :param depgraphs: list of ``DependencyGraph`` objects for training input data
        :type depgraphs: DependencyGraph
        malt_train.conll.rU   FrV   
c              3   >   K   | ]  }|j                  d         yw)
   N)to_conll)r{   dgs     r!   r}   z#MaltParser.train.<locals>.<genexpr>   s     !FIb"++b/Is   NrD   )
rN   ra   rP   rj   rb   rc   train_from_filerf   r1   rn   )rR   	depgraphsrD   rp   	input_strs        r!   trainzMaltParser.train  s     ((&D,<,<3u
		!FI!FFIS^,	
 	Z__g>
		*//"
 
s   3BB%c                    t        |t              rt        j                  d| j                  dd      5 }|j                         5 }|j                         }|j                  t        |             ddd       | j                  |j                  |      cddd       S | j                  |d      }| j                  ||      }|d	k7  rt        d
dj                  |      |fz        d| _        y# 1 sw Y   xY w# 1 sw Y   fxY w)z
        Train MaltParser from a file
        :param conll_file: str for the filename of the training input data
        :type conll_file: str
        r   rU   FrV   Nr   learnr\   r   z1MaltParser training (%s) failed with exit code %dr]   T)
isinstancer   rN   ra   rP   rk   rl   rb   rc   r   rf   re   ri   r`   rj   rM   )rR   
conll_filerD   rp   conll_input_file	conll_strrs   ru   s           r!   r   zMaltParser.train_from_file'  s     j"45,,*0@0@sSX__&*: 0 5 5 7I$$S^4 ' ++JOOW+M  (('(BmmC)!8 XXc]C01   '& s#   C;+C/0$C;/C8	4C;;D)r'   NNN)Fnull)NN)F)__name__
__module____qualname____doc__rS   rx   r   re   staticmethodri   r   r    r#   r!   rH   rH   ^   sL    * !(LT@$D
 @  
#&r#   rH   __main__)inspectr1   r   r   rN   	nltk.datar   nltk.internalsr   r   r   nltk.parse.apir   nltk.parse.dependencygraphr   nltk.parse.utilr	   r"   r@   rF   rH   r   doctesttestmodr   r#   r!   <module>r      sp     	  
  ( E E " 6 0#L*	Rc cL z@D GOOK r#   