
    gF                     \    d dl Z d dl d dl d dlmZmZ  G d de      Z G d de      Z	y)    N)*)map_tag	str2tuplec                       e Zd ZdZd Zd Zy)SwitchboardTurnaE  
    A specialized list object used to encode switchboard utterances.
    The elements of the list are the words in the utterance; and two
    attributes, ``speaker`` and ``id``, are provided to retrieve the
    spearker identifier and utterance id.  Note that utterance ids
    are only unique within a given discourse.
    c                 ^    t         j                  | |       || _        t        |      | _        y N)list__init__speakerintid)selfwordsr   r   s       S/var/www/openai/venv/lib/python3.12/site-packages/nltk/corpus/reader/switchboard.pyr   zSwitchboardTurn.__init__   s"    dE"b'    c                     t        |       dk(  rd}n=t        | d   t              rdj                  d | D              }ndj                  |       }d| j                   d| j
                   d|dS )	Nr     c              3   &   K   | ]	  }d |z    yw)z%s/%sN ).0ws     r   	<genexpr>z+SwitchboardTurn.__repr__.<locals>.<genexpr>    s     6AGaKs   <.z: >)len
isinstancetuplejoinr   r   )r   texts     r   __repr__zSwitchboardTurn.__repr__   sd    t9>DQ'88666D88D>D4<<.$''"THA66r   N)__name__
__module____qualname____doc__r   r#   r   r   r   r   r      s    
7r   r   c                       e Zd ZdgZddZd ZddZd ZddZd Z	dd	Z
d
 ZddZd ZddZd ZddZ ej$                  d      ZdZddZy)SwitchboardCorpusReadertaggedNc                 T    t         j                  | || j                         || _        y r	   )CorpusReaderr   _FILES_tagset)r   roottagsets      r   r   z SwitchboardCorpusReader.__init__+   s    dD$++6r   c                 L    t        | j                  d      | j                        S Nr*   )StreamBackedCorpusViewabspath_words_block_readerr   s    r   r   zSwitchboardCorpusReader.words/       %dll8&<d>V>VWWr   c                 H      fd}t         j                  d      |      S )Nc                 (    j                  |       S r	   )_tagged_words_block_readerstreamr   r0   s    r   tagged_words_block_readerzGSwitchboardCorpusReader.tagged_words.<locals>.tagged_words_block_reader3       2266BBr   r*   r3   r4   )r   r0   r=   s   `` r   tagged_wordsz$SwitchboardCorpusReader.tagged_words2   "    	C &dll8&<>WXXr   c                 L    t        | j                  d      | j                        S r2   )r3   r4   _turns_block_readerr6   s    r   turnszSwitchboardCorpusReader.turns8   r7   r   c                 H      fd}t         j                  d      |      S )Nc                 (    j                  |       S r	   )_tagged_turns_block_readerr;   s    r   tagged_turns_block_readerzGSwitchboardCorpusReader.tagged_turns.<locals>.tagged_turns_block_reader<   r>   r   r*   r?   )r   r0   rH   s   `` r   tagged_turnsz$SwitchboardCorpusReader.tagged_turns;   rA   r   c                 L    t        | j                  d      | j                        S r2   )r3   r4   _discourses_block_readerr6   s    r   
discoursesz"SwitchboardCorpusReader.discoursesA   s#    %LL"D$A$A
 	
r   c                 H      fd}t         j                  d      |      S )Nc                 (    j                  |       S r	   _tagged_discourses_block_readerr;   s    r   tagged_discourses_block_readerzQSwitchboardCorpusReader.tagged_discourses.<locals>.tagged_discourses_block_readerG   s    77GGr   r*   r?   )r   r0   rQ   s   `` r   tagged_discoursesz)SwitchboardCorpusReader.tagged_discoursesF   s'    	H &LL"$B
 	
r   c           	          t        |      D cg c];  }|j                  d      D ]%  }|j                         r| j                  |d      ' = c}}gS c c}}w )N
F)include_tagread_blankline_blocksplitstrip_parse_utterance)r   r<   bus       r   rK   z0SwitchboardCorpusReader._discourses_block_readerN   sa    
 .f55AA779 %%aU%;& <5
 	
s   A Ac           
          t        |      D cg c]<  }|j                  d      D ]&  }|j                         r| j                  |d|      ( > c}}gS c c}}w )NrT   T)rU   r0   rV   )r   r<   r0   r[   r\   s        r   rP   z7SwitchboardCorpusReader._tagged_discourses_block_readerY   sd    
 .f55AA779 %%aT&%I& J5
 	
s   AAc                 *    | j                  |      d   S Nr   )rK   r   r<   s     r   rC   z+SwitchboardCorpusReader._turns_block_readerd   s    ,,V4Q77r   c                 ,    | j                  ||      d   S r_   rO   r   r<   r0   s      r   rG   z2SwitchboardCorpusReader._tagged_turns_block_readerg   s    33FFCAFFr   c                 >    t        | j                  |      d   g       S r_   )sumrK   r`   s     r   r5   z+SwitchboardCorpusReader._words_block_readerj   s    4008;R@@r   c                 @    t        | j                  ||      d   g       S r_   )rd   rP   rb   s      r   r:   z2SwitchboardCorpusReader._tagged_words_block_readerm   s!    477GJBOOr   z(\w+)\.(\d+)\:\s*(.*)/c           
         | j                   j                  |      }|t        d|z        |j                         \  }}}|j	                         D cg c]  }t        || j                         }	}|s|	D 
cg c]  \  }
}|
	 }	}
}n<|r:|| j                  k7  r+|	D 
cg c]  \  }
}|
t        | j                  ||      f  }	}
}t        |	||      S c c}w c c}}
w c c}}
w )NzBad utterance %r)
_UTTERANCE_REmatch
ValueErrorgroupsrX   r   _SEPr.   r   r   )r   	utterancerU   r0   mr   r   r"   sr   r   ts               r   rZ   z(SwitchboardCorpusReader._parse_utterances   s    $$Y/9/);<<HHJT26**,?,Q1dii(,?%*+U6AqQUE+$,,.INOv1avq9:EOugr22 @+Os   C8C#Cr	   )F)r$   r%   r&   r-   r   r   r@   rD   rI   rL   rR   rK   rP   rC   rG   r5   r:   recompilerh   rl   rZ   r   r   r   r)   r)   &   sn    ZFXYXY


	
	
8GAP BJJ78MD
3r   r)   )
rq   nltk.corpus.reader.apinltk.corpus.reader.utilnltk.tagr   r   r
   r   r,   r)   r   r   r   <module>rv      s-    
 $ % '7d 70W3l W3r   