
    g1                         d Z ddlZddlZddlZddlZddlZddlZddlmZ ddl	m
Z
 ddlmZmZ ddlmZ dag dZddZd	 Z G d
 de
      Z G d d      Zedk(  rddlmZmZ d Z eee      Zyy)z;
Classifiers that make use of the external 'Weka' package.
    N)stdin)ClassifierI)config_javajava)DictionaryProbDist).z/usr/share/wekaz/usr/local/share/wekaz/usr/lib/wekaz/usr/local/lib/wekac                     t                | | at        t        }dt        j                  v r#|j                  dt        j                  d          |D ]  }t        j                  j                  t        j                  j                  |d            sAt        j                  j                  |d      at        t              }|rt        dt         d| d       nt        dt        z         t        t                t        t        d      y )	NWEKAHOMEr   zweka.jarz[Found Weka: z
 (version z)]z[Found Weka: %s]zUnable to find weka.jar!  Use config_weka() or set the WEKAHOME environment variable. For more information about Weka, please see https://www.cs.waikato.ac.nz/ml/weka/)r   _weka_classpath_weka_searchosenvironinsertpathexistsjoin_check_weka_versionprintLookupError)	classpath
searchpathr   versions       G/var/www/openai/venv/lib/python3.12/site-packages/nltk/classify/weka.pyconfig_wekar   "   s     M#!
#aJ!78Dww~~bggll4<="$'',,tZ"@-o>M/):*WIRPQ,>?#O4  4
 	
     c                    	 t        j                  |       }	 	 |j	                  d      |j                          S # t        t        f$ r   Y y xY w# t        $ r Y |j                          y w xY w# |j                          w xY w)Nzweka/core/version.txt)zipfileZipFile
SystemExitKeyboardInterruptreadcloseKeyError)jarzfs     r   r   r   C   s|    __S!
	7723 	
 )*   	

	 	
s,   : A A	A,A/ +A,,A/ /Bc                   Z    e Zd Zd Zd Zd Zd Zd Zd Zddd	d
dddZ	e
dg dfd       Zy)WekaClassifierc                      || _         || _        y N)
_formatter_model)self	formattermodel_filenames      r   __init__zWekaClassifier.__init__T   s    #$r   c                 *    | j                  |g d      S )N)-p0z-distribution_classify_manyr,   featuresetss     r   prob_classify_manyz!WekaClassifier.prob_classify_manyX   s    "";0LMMr   c                 *    | j                  |ddg      S )Nr1   r2   r3   r5   s     r   classify_manyzWekaClassifier.classify_many[   s    "";s<<r   c           	         t                t        j                         }	 t        j                  j                  |d      }| j                  j                  ||       dd| j                  d|g|z   }t        |t        t        j                  t        j                        \  }}|r|sd|v rt        d      t        d|z        | j                  |j                  t         j"                        j%                  d	            t        j&                  |      D ]5  }t        j(                  t        j                  j                  ||             7 t        j*                  |       S # t        j&                  |      D ]5  }t        j(                  t        j                  j                  ||             7 t        j*                  |       w xY w)
Nz	test.arff!weka.classifiers.bayes.NaiveBayesz-lz-T)r   stdoutstderrzIllegal options: -distributionzOThe installed version of weka does not support probability distribution output.z"Weka failed to generate output:
%s
)r   tempfilemkdtempr   r   r   r*   writer+   r   r   
subprocessPIPE
ValueErrorparse_weka_outputdecoder   encodingsplitlistdirremovermdir)	r,   r6   optionstemp_dirtest_filenamecmdr<   r=   fs	            r   r4   zWekaClassifier._classify_many^   sc   ##%%	GGLL;?MOO!!-= 4 C  $)!!	 VV f3v=$"  %%JV%STT ))&--*G*M*Md*ST ZZ)		"'',,x34 *HHX ZZ)		"'',,x34 *HHXs   CE" "A$Gc                     t        j                  d|      D cg c]  }|j                         st        |        }}t	        t        | j                  j                         |            }t        |      S c c}w )Nz[*,]+)	rerH   stripfloatdictzipr*   labelsr   )r,   svprobss       r   parse_weka_distributionz&WekaClassifier.parse_weka_distribution   s^    #%88GQ#7E#7a1779q#7ES//159:!%(( Fs
   A6A6c                    t        |      D ]+  \  }}|j                         j                  d      s&||d  } n |d   j                         g dk(  rG|dd  D cg c]8  }|j                         s|j                         d   j                  d      d   : c}S |d   j                         g dk(  rC|dd  D cg c]4  }|j                         r"| j	                  |j                         d         6 c}S t        j                  d	|d         r2|D cg c]&  }|j                         s|j                         d   ( c}S |d d
 D ]  }t        |        t        d|d   z        c c}w c c}w c c}w )Ninst#r   )r]   actual	predictederror
prediction      :)r]   r^   r_   r`   distributionz^0 \w+ [01]\.[0-9]* \?\s*$
   zRUnhandled output format -- your version of weka may not be supported.
  Header: %s)		enumeraterS   
startswithrH   r[   rR   matchr   rD   )r,   linesilines       r   rE   z WekaClassifier.parse_weka_output   sm    'GAtzz|&&w/ab	 (
 8>>VV>CABiXid4::<DJJLO))#.q1iXX1X^^ "
 
 "!"I%D::< ,,TZZ\"-=>%  XX3U1X>05FDJJLOFF cr
d #!&q* + Y Gs   E%/'E%89E*E/'E/r;   zweka.classifiers.trees.J48z#weka.classifiers.functions.Logisticzweka.classifiers.functions.SMOzweka.classifiers.lazy.KStarzweka.classifiers.rules.JRip)
naivebayesC4.5log_regressionsvmkstarripperrn   Tc           	         t                t        j                  |      }t        j                         }	 t
        j                  j                  |d      }|j                  ||       || j                  v r| j                  |   }	n-|| j                  j                         v r|}	nt        d|z        |	d|d|g}
|
t        |      z  }
|rt        j                  }nd }t        |
t         |       t#        ||      t        j$                  |      D ]5  }t        j&                  t
        j                  j                  ||             7 t        j(                  |       S # t        j$                  |      D ]5  }t        j&                  t
        j                  j                  ||             7 t        j(                  |       w xY w)Nz
train.arffzUnknown classifier %sz-dz-t)r   r<   )r   ARFF_Formatter
from_trainr?   r@   r   r   r   rA   _CLASSIFIER_CLASSvaluesrD   listrB   rC   r   r   r'   rI   rJ   rK   )clsr.   r6   
classifierrL   quietr-   rM   train_filename	javaclassrO   r<   rP   s                r   trainzWekaClassifier.train   s^    	 #--k:	##%	WW\\(LANOONK8S22211*=	s44;;==&	 !8:!EFF dND.IC4= C#? ")^< ZZ)		"'',,x34 *HHX ZZ)		"'',,x34 *HHXs   CE A$G N)__name__
__module____qualname__r/   r7   r9   r4   r[   rE   rw   classmethodr    r   r   r'   r'   S   s^    %N=*X)
!b :,?/./ 
  * *r   r'   c                   L    e Zd ZdZd Zd Zd Zd Zed        Z	d Z
dd	Zd
 Zy)ru   z
    Converts featuresets and labeled featuresets to ARFF-formatted
    strings, appropriate for input into Weka.

    Features and classes can be specified manually in the constructor, or may
    be determined from data using ``from_train``.
    c                      || _         || _        y)a)  
        :param labels: A list of all class labels that can be generated.
        :param features: A list of feature specifications, where
            each feature specification is a tuple (fname, ftype);
            and ftype is an ARFF type string such as NUMERIC or
            STRING.
        N)_labels	_features)r,   rW   featuress      r   r/   zARFF_Formatter.__init__   s     !r   c                 F    | j                         | j                  |      z   S )zBReturns a string representation of ARFF output for the given data.)header_sectiondata_section)r,   tokenss     r   formatzARFF_Formatter.format
  s!    ""$t'8'8'@@@r   c                 ,    t        | j                        S )zReturns the list of classes.)ry   r   )r,   s    r   rW   zARFF_Formatter.labels  s    DLL!!r   c                     t        |d      st        |d      }|j                  | j                  |             |j	                          y)z.Writes ARFF data to a file for the given data.rA   wN)hasattropenrA   r   r"   )r,   outfiler   s      r   rA   zARFF_Formatter.write  s6    w(7C(Gdkk&)*r   c                    | D ch c]  \  }}|	 }}}i }| D ]  \  }}|j                         D ]  \  }}t        t        |      t              rd}nTt        t        |      t        t
        t        f      rd}n-t        t        |      t              rd}n|ht        dz        |j                  ||      |k7  rt        d|z        |||<     t        |j                               }t        ||      S c c}}w )z
        Constructs an ARFF_Formatter instance with class labels and feature
        types determined from the given data. Handles boolean, numeric and
        string (note: not nominal) types.
        z{True, False}NUMERICSTRINGzUnsupported value type %rzInconsistent type for %s)items
issubclasstypeboolintrT   strrD   getsortedru   )r   toklabelrW   r   fnamefvalftypes           r   rv   zARFF_Formatter.from_train  s     -33FLS%%F3  JC"yy{td4j$/+ET
S%,>?%ET
C0$E\$%@5%HII<<u-6$%?%%GHH"'  + !  (..*+fh//- 4s   C6c                     ddt        j                         z  z   }|dz  }| j                  D ]  \  }}|d|dd|dz  } |dddd	d
j                  | j                        dz  }|S )z#Returns an ARFF header as a string.z3% Weka ARFF file
% Generated automatically by NLTK
z%% %s

z@RELATION rel

z@ATTRIBUTE 30 r>   z-label-z {,z}
)timectimer   r   r   )r,   rX   r   r   s       r   r   zARFF_Formatter.header_section9  sr    4DJJL() 	
 	
   !NNLE5E599A + 	
)SXXdll5KLLr   Nc           	      .   ||xr t        |d   t        t        f      }|s|D cg c]  }|df }}d}|D ]V  \  }}| j                  D ]+  \  }}|d| j	                  |j                  |            z  z  }- |d| j	                  |      z  z  }X |S c c}w )a  
        Returns the ARFF data section for the given data.

        :param tokens: a list of featuresets (dicts) or labelled featuresets
            which are tuples (featureset, label).
        :param labeled: Indicates whether the given tokens are labeled
            or not.  If None, then the tokens will be assumed to be
            labeled if the first token's value is a tuple or list.
        Nr   z
@DATA
z%s,z%s
)
isinstancetuplery   r   _fmt_arff_valr   )r,   r   labeledr   rX   r   r   r   s           r   r   zARFF_Formatter.data_sectionN  s     ?EF1It}!EG-34VcsDkVF4  JC $uUT//??? !/$,,U333A !
  5s   Bc                 r    |yt        |t        t        f      rd|z  S t        |t              rd|z  S d|z  S )N?z%sz%r)r   r   r   rT   )r,   r   s     r   r   zARFF_Formatter._fmt_arff_valh  s@    <tSk*$;e$$;$;r   r)   )r   r   r   __doc__r/   r   rW   rA   staticmethodrv   r   r   r   r   r   r   ru   ru      s?    	"A" 0 0>*4r   ru   __main__)binary_names_demo_features
names_democ                 0    t         j                  d| d      S )Nz/tmp/name.modelro   )r'   r   )r6   s    r   make_classifierr   v  s    ##$5{FKKr   r)   )r   r   rR   rB   r?   r   r   sysr   nltk.classify.apir   nltk.internalsr   r   nltk.probabilityr   r   r   r   r   r'   ru   r   nltk.classify.utilr   r   r   r{   r   r   r   <module>r      s    
 	      ) , /
B `[ `Fz zz zIL O-GHJ r   