
    g(                        d Z ddlZddlZddl ddlmZ ddlmZ ddl ddl	m
Z
  G d de      Z G d	 d
e      Z G d de      Zi dddddddddddddddddddd d!d"d#d$d%d&d'd(d)d*d+d,d-d.i d/d0d1d2d3d4d5d6d7d8d9d:d;d<d=d>d?d@dAdBdCdDdEdFdGdHdIdJdKdLdMdNdOdNi dPdQdRdSdTdUdVdWdXdYdZd[d\d]d^d_d`dadbdcdddedfdgdhdidjdkdldmdndodpdqi drdsdtdudvdwdxdydzd{d|d}d~dddddddddddddddddddddi ddddddddddddddddddddddddddddddddddddddddddddddddddŜZy)a  
Corpus reader for the York-Toronto-Helsinki Parsed Corpus of Old
English Prose (YCOE), a 1.5 million word syntactically-annotated
corpus of Old English prose texts. The corpus is distributed by the
Oxford Text Archive: http://www.ota.ahds.ac.uk/ It is not included
with NLTK.

The YCOE corpus is divided into 100 files, each representing
an Old English prose text. Tags used within each text complies
to the YCOE standard: https://www-users.york.ac.uk/~lang22/YCOE/YcoeHome.htm
    N)*)BracketParseCorpusReader)TaggedCorpusReader)RegexpTokenizerc                   f    e Zd ZdZddZddZddZd ZddZddZ	dd	Z
dd
ZddZddZddZy)YCOECorpusReaderz
    Corpus reader for the York-Toronto-Helsinki Parsed Corpus of Old
    English Prose (YCOE), a 1.5 million word syntactically-annotated
    corpus of Old English prose texts.
    c                    t         j                  | |g |       t        | j                  j	                  d      dd|      | _        t        | j                  j	                  d      dd      | _        | j
                  j                         D ch c]  }|d d 	 }}| j                  j                         D ch c]  }|d d 	 c}|k7  rt        d      t        |D cg c]  }d	|z  	 c}|D cg c]  }d
|z  	 c}z         }t         j                  | |||       t        |      | _        y c c}w c c}w c c}w c c}w )Npsdz.*.psd)encodingpos.posz5Items in "psd" and "pos" subdirectories do not match.%s.psd%s.pos)CorpusReader__init__YCOEParseCorpusReaderrootjoin_psd_readerYCOETaggedCorpusReader_pos_readerfileids
ValueErrorsorted
_documents)selfr   r   f	documentsdocr   s          L/var/www/openai/venv/lib/python3.12/site-packages/nltk/corpus/reader/ycoe.pyr   zYCOECorpusReader.__init__%   s.   dD"h70IINN5!4(
 2$))..2GvV &*%5%5%=%=%?@%?QsV%?	@ ,,44676qAcrF679DWXX'01yX^y1)23#x#~34
 	dD'8< + A7 23s   D-7D2D7/D<
Nc                     || j                   S t        |t              r|g}|D ]  }|| j                  vst	        d|z         t        |D ch c]  }|dd 	 c}      S c c}w )z
        Return a list of document identifiers for all documents in
        this corpus, or for the documents with the given file(s) if
        specified.
        NzFile id %s not foundr   )r   
isinstancestr_fileidsKeyErrorr   )r   r   r   s      r"   r    zYCOECorpusReader.documents9   sp     ???"gs#iGA%5?@@  w/w!q"vw/00/s   A!c           
          || j                   S t        |t              r|g}t        t	        |D cg c]  }d|z  	 c}|D cg c]  }d|z  	 c}z               S c c}w c c}w )z
        Return a list of file identifiers for the files that make up
        this corpus, or that store the given document(s) if specified.
        r   r   )r&   r$   r%   r   set)r   r    r!   s      r"   r   zYCOECorpusReader.fileidsI   sq    
 == 	3'"I+459CC95-67Yc8c>Y78
 	
57s   AA c                     || j                   }nHt        |t              r|g}|D ]0  }|| j                   vs|dd dv rt        d      t        d|z         |D cg c]	  }| d|  c}S c c}w )z
        Helper that selects the appropriate fileids for a given set of
        documents from a given subcorpus (pos or psd).
        Nr   )r   r   zvExpected a document identifier, not a file identifier.  (Use corpus.documents() to get a list of document identifiers.z Document identifier %s not found.)r   r$   r%   r   )r   r    	subcorpusdocumentds        r"   _getfileidszYCOECorpusReader._getfileidsY   s    
 I)S)&K	%4??2}(88(>  ))Kh)VWW & -66Iq1#Qyk"I666s   A-c                 X    | j                   j                  | j                  |d            S Nr   )r   wordsr/   r   r    s     r"   r2   zYCOECorpusReader.wordsp   &    %%d&6&6y%&HII    c                 X    | j                   j                  | j                  |d            S r1   )r   sentsr/   r3   s     r"   r7   zYCOECorpusReader.sentss   r4   r5   c                 X    | j                   j                  | j                  |d            S r1   )r   parasr/   r3   s     r"   r9   zYCOECorpusReader.parasv   r4   r5   c                 X    | j                   j                  | j                  |d            S r1   )r   tagged_wordsr/   r3   s     r"   r;   zYCOECorpusReader.tagged_wordsy   &    ,,T-=-=i-OPPr5   c                 X    | j                   j                  | j                  |d            S r1   )r   tagged_sentsr/   r3   s     r"   r>   zYCOECorpusReader.tagged_sents|   r<   r5   c                 X    | j                   j                  | j                  |d            S r1   )r   tagged_parasr/   r3   s     r"   r@   zYCOECorpusReader.tagged_paras   r<   r5   c                 X    | j                   j                  | j                  |d            S )Nr
   )r   parsed_sentsr/   r3   s     r"   rB   zYCOECorpusReader.parsed_sents   r<   r5   utf8)N)__name__
__module____qualname____doc__r   r    r   r/   r2   r7   r9   r;   r>   r@   rB    r5   r"   r   r      sE    ,(1 
 7.JJJQQQQr5   r   c                       e Zd ZdZd Zy)r   zrSpecialized version of the standard bracket parse corpus reader
    that strips out (CODE ...) and (ID ...) nodes.c                     t        j                  dd|      }t        j                  d|      ry t        j                  | |      S )Nz(?u)\((CODE|ID)[^\)]*\) z\s*\(\s*\)\s*$)resubmatchr   _parse)r   ts     r"   rP   zYCOEParseCorpusReader._parse   s:    FF-r1588%q)'..tQ77r5   N)rE   rF   rG   rH   rP   rI   r5   r"   r   r      s    68r5   r   c                       e Zd ZddZy)r   c                 V    d}t        |d      }t        j                  | ||d|       y )Nz+(?u)(?<=/\.)\s+|\s*\S*_CODE\s*|\s*\S*_ID\s*T)gaps_)sepsent_tokenizer)r   r   r   )r   r   itemsr   gaps_rerW   s         r"   r   zYCOETaggedCorpusReader.__init__   s,    @(t<##$3~	
r5   NrC   )rE   rF   rG   r   rI   r5   r"   r   r      s    
r5   r   zcoadrian.o34zAdrian and Ritheuszcoaelhom.o3u   Ælfric, Supplemental Homilieszcoaelive.o3u   Ælfric's Lives of SaintscoalcuinzAlcuin De virtutibus et vitiisz
coalex.o23zAlexander's Letter to Aristotlezcoapollo.o3zApollonius of Tyrecoaugust	Augustinez	cobede.o2z$Bede's History of the English Churchzcobenrul.o3zBenedictine Rulezcoblick.o23zBlickling Homiliesz
coboeth.o2z#Boethius' Consolation of Philosophyzcobyrhtf.o3zByrhtferth's Manual	cocanedgDzCanons of Edgar (D)	cocanedgXzCanons of Edgar (X)zcocathom1.o3u   Ælfric's Catholic Homilies Izcocathom2.o3u   Ælfric's Catholic Homilies IIz
cochad.o24z
Saint ChadcochdrulzChrodegang of Metz, RulecochristophzSaint ChristopherzcochronA.o23zAnglo-Saxon Chronicle AcochronCzAnglo-Saxon Chronicle CcochronDzAnglo-Saxon Chronicle DzcochronE.o34zAnglo-Saxon Chronicle Ez	cocura.o2zCura PastoraliscocuraCzCura Pastoralis (Cotton)zcodicts.o34zDicts of Catoz
codocu1.o1zDocuments 1 (O1)zcodocu2.o12zDocuments 2 (O1/O2)z
codocu2.o2zDocuments 2 (O2)zcodocu3.o23zDocuments 3 (O2/O3)z
codocu3.o3zDocuments 3 (O3)zcodocu4.o24zDocuments 4 (O2/O4)coeluc1z Honorius of Autun, Elucidarium 1coeluc2zcoepigen.o3u   Ælfric's Epilogue to GenesiscoeuphrzSaint Euphrosynecoeustz Saint Eustace and his companions	coexodusPz
Exodus (P)	cogenesiCzGenesis (C)zcogregdC.o24zGregory's Dialogues (C)zcogregdH.o23zGregory's Dialogues (H)coherbarzPseudo-Apuleius, HerbariumzcoinspolD.o34z"Wulfstan's Institute of Polity (D)	coinspolXz"Wulfstan's Institute of Polity (X)cojameszSaint Jameszcolacnu.o23Lacnungaz
colaece.o2	Leechdomszcolaw1cn.o3zLaws, Cnut Izcolaw2cn.o3zLaws, Cnut IIzcolaw5atr.o3u   Laws, Æthelred Vzcolaw6atr.o3u   Laws, Æthelred VIz
colawaf.o2zLaws, Alfredzcolawafint.o2zAlfred's Introduction to Lawszcolawger.o34zLaws, Gerefazcolawine.ox2z	Laws, Inezcolawnorthu.o3zNorthumbra Preosta Laguzcolawwllad.o4zLaws, William I, Ladzcoleofri.o4Leofriczcolsigef.o3u   Ælfric's Letter to Sigefyrth	colsigewBu!   Ælfric's Letter to Sigeweard (B)zcolsigewZ.o34u!   Ælfric's Letter to Sigeweard (Z)colwgeatu   Ælfric's Letter to Wulfgeat	colwsigeTu    Ælfric's Letter to Wulfsige (T)zcolwsigeXa.o34u!   Ælfric's Letter to Wulfsige (Xa)zcolwstan1.o3u   Ælfric's Letter to Wulfstan Izcolwstan2.o3u   Ælfric's Letter to Wulfstan IIzcomargaC.o34zSaint Margaret (C)comargaTzSaint Margaret (T)comart1zMartyrology, Icomart2zMartyrology, IIzcomart3.o23zMartyrology, IIIzcomarvel.o23zMarvels of the EastcomaryzMary of Egyptconeotz
Saint NeotconicodAzGospel of Nicodemus (A)conicodCzGospel of Nicodemus (C)conicodDzGospel of Nicodemus (D)conicodEzGospel of Nicodemus (E)zcoorosiu.o2Orosiusz
cootest.o3
Heptateuchzcoprefcath1.o3u(   Ælfric's Preface to Catholic Homilies Izcoprefcath2.o3u)   Ælfric's Preface to Catholic Homilies IIzcoprefcura.o2zPreface to the Cura Pastoraliszcoprefgen.o3u   Ælfric's Preface to Genesiszcopreflives.o3u$   Ælfric's Preface to Lives of Saintsz"Preface to Augustine's Soliloquiesz*Pseudo-Apuleius, Medicina de quadrupedibuszHistory of the Holy Rood-TreezSeven SleeperszSt. Augustine's SoliloquieszSolomon and Saturn IzSolomon and Saturn IIu   Ælfric's De Temporibus AnnizVercelli HomilieszVercelli Homilies (E)zVercelli Homilies (L)zSaint Vincent (Bodley 343)zVindicta SalvatoriszWest-Saxon GospelszWulfstan's Homilies)coprefsolilozcoquadru.o23corood	cosevenslcosolilozcosolsat1.o4	cosolsat2z
cotempo.o3coverhom	coverhomE	coverhomLcovinceBcovinsalzcowsgosp.o3z
cowulf.o34)rH   osrM   nltk.corpus.reader.api nltk.corpus.reader.bracket_parser   nltk.corpus.reader.taggedr   nltk.corpus.reader.utilnltk.tokenizer   r   r   r   r   r    rI   r5   r"   <module>r      s  
 
 	 $ E 8 % )eQ| eQP84 8
/ 
e(e3e .e 0	e
 3e 'e e 7e %e 'e 7e (e &e &e 3e  4!e" ,#e$ *%e& &'e( -)e* )+e, )-e. -/e0 "1e2 )3e4 ?5e6 $7e8 (9e: $;e< (=e> $?e@ (AeB 1CeD 1EeF 2GeH !IeJ 0KeL MeN OeP -QeR -SeT ,UeV 9WeX 5YeZ }[e\ :]e^ +_e` >aeb ?ced 'eef (geh .iej 4kel Nmen Koep /qer +set 9uev 2wex 4yez 8{e| .}e~ 3e@ 9AeB 4CeD 5EeF (GeH $IeJ KeL  MeN %OeP )QeR oSeT lUeV )WeX )YeZ )[e\ )]e^ 9_e` ,aeb @ced Aeef 5geh 2iej <kel 9@-!-*(0#((,%''Ie	r5   