
    gB                     h    d Z ddlZddlmZ 	 ddlZdad	dZd
dZddZ	d Z
d Zy# e$ r dZY w xY w)aP  
A set of functions used to interface with the external megam_ maxent
optimization package. Before megam can be used, you should tell NLTK where it
can find the megam binary, using the ``config_megam()`` function. Typical
usage:

    >>> from nltk.classify import megam
    >>> megam.config_megam() # pass path to megam if not found in PATH # doctest: +SKIP
    [Found megam: ...]

Use with MaxentClassifier. Example below, see MaxentClassifier documentation
for details.

    nltk.classify.MaxentClassifier.train(corpus, 'megam')

.. _megam: https://www.umiacs.umd.edu/~hal/megam/index.html
    N)find_binaryc                 *    t        d| dgg dd      ay)aA  
    Configure NLTK's interface to the ``megam`` maxent optimization
    package.

    :param bin: The full path to the ``megam`` binary.  If not specified,
        then nltk will search the system for a ``megam`` binary; and if
        one is not found, it will raise a ``LookupError`` exception.
    :type bin: str
    megamMEGAM)z	megam.optr   	megam_686zmegam_i686.optz0https://www.umiacs.umd.edu/~hal/megam/index.html)env_varsbinary_namesurlN)r   
_megam_bin)bins    H/var/www/openai/venv/lib/python3.12/site-packages/nltk/classify/megam.pyconfig_megamr   )   s     J>J    c                   
 j                         }t        |      D ci c]  \  }}||
 }}}| D ]  \  
t        d      r,|j                  dj	                  
fd|D                     n|j                  d|   z         |st        j                  
      ||       n5|D ]0  }	|j                  d       t        j                  
|	      ||       2 |j                  d        yc c}}w )a  
    Generate an input file for ``megam`` based on the given corpus of
    classified tokens.

    :type train_toks: list(tuple(dict, str))
    :param train_toks: Training data, represented as a list of
        pairs, the first member of which is a feature dictionary,
        and the second of which is a classification label.

    :type encoding: MaxentFeatureEncodingI
    :param encoding: A feature encoding, used to convert featuresets
        into feature vectors. May optionally implement a cost() method
        in order to assign different costs to different class predictions.

    :type stream: stream
    :param stream: The stream to which the megam input file should be
        written.

    :param bernoulli: If true, then use the 'bernoulli' format.  I.e.,
        all joint features have binary values, and are listed iff they
        are true.  Otherwise, list feature values explicitly.  If
        ``bernoulli=False``, then you must call ``megam`` with the
        ``-fvals`` option.

    :param explicit: If true, then use the 'explicit' format.  I.e.,
        list the features that would fire for any of the possible
        labels, for each token.  If ``explicit=True``, then you must
        call ``megam`` with the ``-explicit`` option.
    cost:c              3   V   K   | ]   }t        j                  |             " y wN)strr   ).0lencoding
featuresetlabels     r   	<genexpr>z#write_megam_file.<locals>.<genexpr>i   s$     R6aX]]:ua@A6s   &)z%dz #
N)labels	enumeratehasattrwritejoin_write_megam_featuresencode)
train_toksr   stream	bernoulliexplicitr   ir   labelnumr   r   s    `     `  @r   write_megam_filer*   B   s    > __F+4V+<=+<Zaq+<H= (
E8V$LLR6RR LL/0 !(//*e"DfiX
 T"%hooj!&DfiX 
 	T- ( >s   C2c                 (   t         t        d      |sJ d       | j                         j                  d      }t        j                  |d      }|D ]=  }|j                         s|j                         \  }}t        |      |t        |      <   ? |S )z
    Given the stdout output generated by ``megam`` when training a
    model, return a ``numpy`` array containing the corresponding weight
    vector.  This function does not currently handle bias features.
    z.This function requires that numpy be installedznon-explicit not supported yetr   d)numpy
ValueErrorstripsplitzerosfloatint)sfeatures_countr'   linesweightslinefidweights           r   parse_megam_weightsr;   ~   s     }IJJ5558GGIOOD!Ekk.#.G::<**,KC %fGCH  Nr   c                     | st        d      | D ]I  \  }}|r+|dk(  r|j                  d|z         "|dk7  s(t        d      |j                  d| d|        K y )Nz:MEGAM classifier requires the use of an always-on feature.   z %sr   z3If bernoulli=True, then allfeatures must be binary. )r.   r    )vectorr%   r&   r9   fvals        r   r"   r"      sq    K
 	
 	TqyUS[) L  LL1SE4&)* r   c                    t        | t              rt        d      t        
t	                t        g| z   }t        j                  |t
        j                        }|j                         \  }}|j                  dk7  r t                t        |       t        d      t        |t              r|S |j                  d      S )z=
    Call the ``megam`` binary with the given arguments.
    z args should be a list of strings)stdoutr   zmegam command failed!zutf-8)
isinstancer   	TypeErrorr   r   
subprocessPopenPIPEcommunicate
returncodeprintOSErrordecode)argscmdprB   stderrs        r   
call_megamrQ      s     $:;; ,
CZ__5A}}VV 	||qf-..&#}}W%%r   r   )TT)T)__doc__rE   nltk.internalsr   r-   ImportErrorr   r   r*   r;   r"   rQ    r   r   <module>rV      sR   "  & 
29x$+"&E  Es   ' 11