
    g5%                        d Z ddlZddlmZ ddlmZmZmZmZ ddl	m
Z
 ddlmZ  ej                  d      Z ej                  d      Z ej                  d	      Z ej                  d
ej"                        Z ej                  dej"                        Z ej                  dej"                        Z ej                  d      Z G d d      Z G d d      Zd Zd Zd Zd Zd ZddZddZ e
d      d        Z ed      Z y)z
CCG Lexicons
    N)defaultdict)CCGVar	DirectionFunctionalCategoryPrimitiveCategory)
deprecated)
Expressionz([A-Za-z]+)(\[[A-Za-z,]+\])?z"([A-Za-z]+(?:\[[A-Za-z,]+\])?)(.*)z([\\/])([.,]?)([.,]?)(.*)z([\S_]+)\s*(::|[-=]+>)\s*(.+)z([^{}]*[^ {}])\s*(\{[^}]+\})?z\{([^}]+)\}z([^#]*)(?:#.*)?c                   0    e Zd ZdZddZd Zd Zd Zd Zy)	Tokenz
    Class representing a token.

    token => category {semantics}
    e.g. eat => S\var[pl]/var {\x y.eat(x,y)}

    * `token` (string)
    * `categ` (string)
    * `semantics` (Expression)
    Nc                 .    || _         || _        || _        y N)_token_categ
_semantics)selftokencateg	semanticss       E/var/www/openai/venv/lib/python3.12/site-packages/nltk/ccg/lexicon.py__init__zToken.__init__9   s    #    c                     | j                   S r   )r   r   s    r   r   zToken.categ>   s    {{r   c                     | j                   S r   )r   r   s    r   r   zToken.semanticsA   s    r   c                     d}| j                   dt        | j                         z   dz   }dt        | j                        z   |z   S )N z {})r   strr   )r   semantics_strs     r   __str__zToken.__str__D   sB    ??& 3t#77#=MC$$}44r   c                     t        |t              syt        | j                  | j                  f|j                         |j                               S )N)
isinstancer   cmpr   r   r   r   )r   others     r   __cmp__zToken.__cmp__J   s8    %'DKK15;;=%//BSTTr   r   )	__name__
__module____qualname____doc__r   r   r   r    r&    r   r   r   r   -   s!    	$
5Ur   r   c                   (    e Zd ZdZd Zd Zd Zd Zy)
CCGLexiconz
    Class representing a lexicon for CCG grammars.

    * `primitives`: The list of primitive categories for the lexicon
    * `families`: Families of categories
    * `entries`: A mapping of words to possible categories
    c                 N    t        |      | _        || _        || _        || _        y r   )r   _start_primitives	_families_entries)r   start
primitivesfamiliesentriess        r   r   zCCGLexicon.__init__Y   s$    '.%!r   c                      | j                   |   S )z@
        Returns all the possible categories for a word
        )r2   )r   words     r   
categorieszCCGLexicon.categories_   s     }}T""r   c                     | j                   S )z;
        Return the target category for the parser
        )r/   r   s    r   r3   zCCGLexicon.starte   s     {{r   c                     d}d}t        | j                        D ]9  }|s|dz   }||z   dz   }d}| j                  |   D ]  }|s|dz   }nd}|d|z  z   } ; |S )zK
        String representation of the lexicon. Used for debugging.
        r   T
z => z | Fz%s)sortedr2   )r   stringfirstidentcats        r   r    zCCGLexicon.__str__k   sy     DMM*E$e^f,FE}}U+#e^F!E$*, , + r   N)r'   r(   r)   r*   r   r9   r3   r    r+   r   r   r-   r-   P   s     #r   r-   c                 &   | dd }d}|dk7  rZ|j                  d      sI|j                  d      rt        |      \  }}||z   }n||d   z   }|dd }|dk7  r|j                  d      sI|j                  d      r
|dz   |dd fS t        d| z   dz         )	zb
    Separate the contents matching the first set of brackets from the rest of
    the input.
       N(r   )r   zUnmatched bracket in string '')
startswithmatchBracketsAssertionError)r>   restinsideparts       r   rH   rH      s    
 !":DF
"*T__S1??3(.LT4d]Fd1g%F8D "*T__S1 sd12h''
86ACG
HHr   c                     | j                  d      rt        |       S t        j                  |       j	                         S )zb
    Separate the string for the next portion of the category from the rest
    of the string
    rD   )rG   rH   NEXTPRIM_REmatchgroups)r>   s    r   nextCategoryrQ      s7    
 V$$V$++--r   c                 &    t        | d   | dd       S )z'
    Parse an application operator
    r   rC   N)r   )apps    r   parseApplicationrT      s     SVSW%%r   c                 2    | r| dd j                  d      S g S )z7
    Parse the subscripts for a primitive category
    rC   r"   ,)split)subscrs    r   parseSubscriptsrY      s#     a|!!#&&Ir   c                     | d   dk(  r| d   |
t               }||fS | d   }||v r(||   \  }}||}||fS |j                  ||fg      }||fS ||v rt        | d         }t        ||      |fS t	        d|z   dz         )z
    Parse a primitive category

    If the primitive is the special category 'var', replace it with the
    correct `CCGVar`.
    r   varrC   zString 'z-' is neither a family nor primitive category.)r   
substituterY   r   rI   )chunksr4   r5   r[   catstrrA   cvarsubscrss           r   parsePrimitiveCategoryra      s     ayE!9{h:AYFv&d;C Sz ..4+/CSz!&),!&'2C88
VMM r   c                 >   t        |       \  }}|j                  d      rt        |dd |||      \  }}n2t        t        j                  |      j                         |||      \  }}|dk7  rt        j                  |      j                         }t        |dd       }|d   }t        |      \  }}|j                  d      rt        |dd |||      \  }	}n2t        t        j                  |      j                         |||      \  }	}t        ||	|      }|dk7  r||fS )z{
    Parse a string representing a category, and returns a tuple with
    (possibly) the CCG variable for the category
    rD   rC   r"   r   r      )
rQ   rG   augParseCategoryra   PRIM_RErO   rP   APP_RErT   r   )
liner4   r5   r[   
cat_stringrJ   resrS   	directionargs
             r   rd   rd      s'   
 &d+ZS!%j2&6
HcR
c ,MM*%,,.
Hc

c "*ll4 '')$S1X.	1v)$/T  %)*Qr*:JRUVJS#/j)002J#JS# !c95 "* :r   c                 t   t        j                          g }i }t        t              }| j	                         D ]h  }t
        j                  |      j                         d   j                         }|dk(  r>|j                  d      rB||dd j                         j                  d      D cg c]  }|j                          c}z   }t        j                  |      j                         \  }}}	t        j                  |	      j                         \  }
}t        |
||      \  }}|dk(  r||f||<   d}|du rI|t        |d	z         t        j                   t"        j                  |      j                         d         }||   j%                  t'        |||             k t)        |d   |||      S c c}w )
z@
    Convert string representation into a lexicon for CCGs.
    r   r   z:-   NrV   z::Tz@ must contain semantics because include_semantics is set to True)r   reset_idr   list
splitlinesCOMMENTS_RErO   rP   striprG   rW   LEX_RERHS_RErd   rI   r	   
fromstringSEMANTICS_REappendr   r-   )lex_strinclude_semanticsr4   r5   r6   rg   primr@   seprhsr^   r   rA   r[   r   s                  r   ru   ru      s    OOJH$G""$  &--/288:2:??4  $)-ab)9)?)?)D')D

)D' J
 !'T 2 9 9 ;UC&,ll3&7&>&>&@#V])&*hGJS#d{ $'* 	$,$,, `a 
 %/$9$9(..}=DDFqI%	
 %%eE3	&BCK %L jmZ7CC9's   ,F5zUse fromstring() instead.c                     t        |       S r   )ru   )rx   s    r   parseLexiconr~   "  s    gr   a  
    # Rather minimal lexicon based on the openccg `tinytiny' grammar.
    # Only incorporates a subset of the morphological subcategories, however.
    :- S,NP,N                    # Primitive categories
    Det :: NP/N                  # Determiners
    Pro :: NP
    IntransVsg :: S\NP[sg]    # Tensed intransitive verbs (singular)
    IntransVpl :: S\NP[pl]    # Plural
    TransVsg :: S\NP[sg]/NP   # Tensed transitive verbs (singular)
    TransVpl :: S\NP[pl]/NP   # Plural

    the => NP[sg]/N[sg]
    the => NP[pl]/N[pl]

    I => Pro
    me => Pro
    we => Pro
    us => Pro

    book => N[sg]
    books => N[pl]

    peach => N[sg]
    peaches => N[pl]

    policeman => N[sg]
    policemen => N[pl]

    boy => N[sg]
    boys => N[pl]

    sleep => IntransVsg
    sleep => IntransVpl

    eat => IntransVpl
    eat => TransVpl
    eats => IntransVsg
    eats => TransVsg

    see => TransVpl
    sees => TransVsg
    r   )F)!r*   recollectionsr   nltk.ccg.apir   r   r   r   nltk.internalsr   nltk.sem.logicr	   compilere   rN   rf   UNICODErs   rt   rv   rq   r   r-   rH   rQ   rT   rY   ra   rd   ru   r~   openccg_tinytinyr+   r   r   <module>r      s    
 # Q Q % % "**8
9 bjjBC 
4	5 
8"**	E 
8"**	E rzz,bjj9 bjj./ U  UF- -jI(.&<@.Db '( ) )+ r   