
    gG                     f    d dl Z d dlmZ d dlmZ d dlmZ  G d de      Zd Ze	dk(  r e        yy)	    N)defaultdict)reduce)CorpusReaderc                   v     e Zd ZdZ ej
                  d      Zed        Zd	 fd	Z	d
dZ
d
dZd
dZd Z xZS )LinThesaurusCorpusReaderzEWrapper for the LISP-formatted thesauruses distributed by Dekang Lin.z \("?([^"]+)"? \(desc [0-9.]+\).+c                       t        t              S )z6Factory for creating defaultdict of defaultdict(dict)s)r   dict     K/var/www/openai/venv/lib/python3.12/site-packages/nltk/corpus/reader/lin.py__defaultdict_factoryz.LinThesaurusCorpusReader.__defaultdict_factory   s     4  r   c                    t         |   |d       t        t        j                        | _        || _        | j                  dd      D ]  \  }}}t        |      5 }d}|D ]  }|j                         }|r#t        j                  j                  d|      }	d}8|dk(  rd}@|j                  d      }
t        |
      dk(  s`|
\  }}t        |      | j
                  |   	   |j                  d	      <    	 d
d
d
        y
# 1 sw Y   xY w)a  
        Initialize the thesaurus.

        :param root: root directory containing thesaurus LISP files
        :type root: C{string}
        :param badscore: the score to give to words which do not appear in each other's sets of synonyms
        :type badscore: C{float}
        zsim[A-Z]\.lspT)include_encodinginclude_fileidz\1Fz))	   "N)super__init__r   r   ._LinThesaurusCorpusReader__defaultdict_factory
_thesaurus	_badscoreabspathsopenstrip_key_resubsplitlenfloat)selfrootbadscorepathencodingfileidlin_filefirstlinekey
split_linengramscore	__class__s                r   r   z!LinThesaurusCorpusReader.__init__   s    	/0%&>&T&TU!&*mm!$ '4 '
"D(F dx$D::<D6>>BB5$O % $ &*ZZ%5
z?a/+5LE5MR %NDOOF3C8S9IJ % '
 s   A$D4DD
	c                 j   ||k(  r|ry| j                   D cg c]  }|df c}S |r5|| j                  |   |   v r| j                  |   |   |   S | j                  S | j                   D cg c]9  }||| j                  |   |   v r| j                  |   |   |   n| j                  f; c}S c c}w c c}w )a  
        Returns the similarity score for two ngrams.

        :param ngram1: first ngram to compare
        :type ngram1: C{string}
        :param ngram2: second ngram to compare
        :type ngram2: C{string}
        :param fileid: thesaurus fileid to search in. If None, search all fileids.
        :type fileid: C{string}
        :return: If fileid is specified, just the score for the two ngrams; otherwise,
                 list of tuples of fileids and scores.
        g      ?)_fileidsr   r   )r!   ngram1ngram2r&   fids        r   
similarityz#LinThesaurusCorpusReader.similarity?   s     V.2mm<msc
m<< !8!@@ OOF+F3F;   $}}
  -   &)=f)EE !OOC08@!%  -
 
 =
s   B+*>B0c                     |r | j                   |   |   j                         S | j                  D cg c]$  }|| j                   |   |   j                         f& c}S c c}w )a   
        Returns a list of scored synonyms (tuples of synonyms and scores) for the current ngram

        :param ngram: ngram to lookup
        :type ngram: C{string}
        :param fileid: thesaurus fileid to search in. If None, search all fileids.
        :type fileid: C{string}
        :return: If fileid is specified, list of tuples of scores and synonyms; otherwise,
                 list of tuples of fileids and lists, where inner lists consist of tuples of
                 scores and synonyms.
        )r   itemsr0   r!   r,   r&   s      r   scored_synonymsz(LinThesaurusCorpusReader.scored_synonymsf   sk     ??6*517799 #mm+F 07==?@+     )Ac                     |r | j                   |   |   j                         S | j                  D cg c]$  }|| j                   |   |   j                         f& c}S c c}w )a  
        Returns a list of synonyms for the current ngram.

        :param ngram: ngram to lookup
        :type ngram: C{string}
        :param fileid: thesaurus fileid to search in. If None, search all fileids.
        :type fileid: C{string}
        :return: If fileid is specified, list of synonyms; otherwise, list of tuples of fileids and
                 lists, where inner lists contain synonyms.
        )r   keysr0   r7   s      r   synonymsz!LinThesaurusCorpusReader.synonymsz   sk     ??6*516688 #mm+F 07<<>?+  r9   c                 <     t         fd j                  d      S )z
        Determines whether or not the given ngram is in the thesaurus.

        :param ngram: ngram to lookup
        :type ngram: C{string}
        :return: whether the given ngram is in the thesaurus.
        c                 .    | xs j                   |   v S N)r   )accumr&   r,   r!   s     r   <lambda>z7LinThesaurusCorpusReader.__contains__.<locals>.<lambda>   s    %"MET__V5L,L"Mr   F)r   r0   )r!   r,   s   ``r   __contains__z%LinThesaurusCorpusReader.__contains__   s     MMM
 	
r   )g        r?   )__name__
__module____qualname____doc__recompiler   staticmethodr   r   r4   r8   r<   rB   __classcell__)r.   s   @r   r   r      sF    O bjj<=G! !"H%N(&
r   r   c                     ddl m}  d}d}t        d|z          t        | j                  |             t        d|z          t        | j	                  |             t        d|z          t        | j                  |d	             t        d|z          t        | j                  |d	             t        d
| d| d       t        | j                  ||             y )Nr   )lin_thesaurusbusiness
enterprisezGetting synonyms for zGetting scored synonyms for z5Getting synonyms from simN.lsp (noun subsection) for zsimN.lsp)r&   zSimilarity score for z and :)nltk.corpusrL   printr<   r8   r4   )thesword1word2s      r   demorU      s    1EE	
!E
)*	$--
	
(5
01	$

u
%&	
AE
IJ	$--j-
12	
AE
IJ	$--j-
12	!%eWA
67	$//%
'(r   __main__)
rG   collectionsr   	functoolsr   nltk.corpus.readerr   r   rU   rC   r
   r   r   <module>rZ      s;    
 #  +J
| J
d)* zF r   