
    g&                     0    d dl Z d dlZddlmZmZmZ ddZy)    N   )	HTML2Text__version__configc            
         d}  G d d      }t        j                         }|j                  ddt        j                  d       |j                  dd	d
t        j
                  d       |j                  dddt        j                  d       |j                  ddd
t        j                  d       |j                  ddd
t        j                  d       |j                  ddd
t        j                  d       |j                  dddt        j                  d       |j                  ddd
t        j                  d       |j                  d d
d!t        j                  d"#       |j                  d$d%d
t        j                  d&       |j                  d'd(d
t        j                  d)       |j                  d*d+d
t        j                  d,       |j                  d-d.d
t        j                   d/       |j                  d0d1d
t        j"                  d2       |j                  d3d4d
d5d6d7#       |j                  d8d9d
d:d6d;#       |j                  d<d=d
d>d6d?#       |j                  d@dAdBt$        t        j&                  dCD       |j                  dEdFdGt$        t        j(                  dHD       |j                  dIdJd
dKd6dL#       |j                  dMd
dNd6dO#       |j                  dPd
dQt        j*                  dR#       |j                  dSd
dTt        j,                  dU#       |j                  dVd
dWt        j.                  dX#       |j                  dYd
dZt        j0                  d[#       |j                  d\dd]t        j2                  d^#       |j                  d_dd`t        j4                  da#       |j                  dbd
dct        j6                  dd#       |j                  ded
dft        j8                  dg#       |j                  dhdit        j:                  dj       |j                  dkdlt        j<                  dm       |j                  dndot        j>                  dp       |j                  dqdrdsjA                  tC        tD        tF                    t       |j                  dudvw       |j                  dxdvdyz       |j                  d{d|d
t        jH                  d}       |jK                         }|jL                  r?|jL                  d~k7  r0tO        |jL                  d      5 }|jQ                         }d d d        n(tR        jT                  jV                  jQ                         }	 jY                  |jZ                  |j\                        }ti        |       }	|jj                  rd~|	_6        |jn                  rd|	_8        d|	_9        |jt                  |	_:        |jv                  |	_<        |jz                  |	_=        |j|                  |	_>        |j~                  |	_?        |j                  |	_@        |j                  |	_A        |j                  |	_B        |j                  |	_C        |j                  |	_D        |j                  |	_E        |j                  |	_F        |j                  |	_G        |j                  |	_H        |j                  |	_I        |j                  |	_J        |j                  |	_K        |j                  |	_L        |j                  |	_M        |j                  |	_N        |j                  |	_O        |j                  |	_P        |j                  |	_Q        |j                  |	_R        |j                  |	_S        |j                  |	_T        |j                  |	_U        |j                  |	_V        |j                  |	_W        |j                  |	_X        tR        j                  j                  |	j                  |             y # 1 sw Y   xY w# t^        $ rU}|j`                  dz   |jb                  z   }|d|jd                  z   z  }|d|jb                  z   dz   z  }tg        |       |d }~ww xY w)N c                   ,    e Zd ZdZdZdZdZdZdZdZ	dZ
y	)
main.<locals>.bcolorsz[95mz[94mz[92mz[93mz[91mz[0mz[1mz[4mN)__name__
__module____qualname__HEADEROKBLUEOKGREENWARNINGFAILENDCBOLD	UNDERLINE     K/var/www/openai/venv/lib/python3.12/site-packages/crawl4ai/html2text/cli.pybcolorsr
   
   s*    	r   r   z--default-image-altdefault_image_altz3The default alt string for images with missing ones)destdefaulthelpz--pad-tables
pad_tables
store_truez-pad the cells to equal column width in tables)r   actionr   r   z--no-wrap-links
wrap_linksstore_falsez"don't wrap links during conversionz--wrap-list-itemswrap_list_itemsz!wrap list items during conversionz--wrap-tableswrap_tableszwrap tablesz--ignore-emphasisignore_emphasisz)don't include any formatting for emphasisz--reference-linksinline_linksz1use reference style links instead of inline linksz--ignore-linksignore_linksz&don't include any formatting for linksz--ignore-mailto-linksignore_mailto_linkszdon't include mailto: links)r    r   r   r   z--protect-linksprotect_linkszCprotect links from line breaks surrounding them with angle bracketsz--ignore-imagesignore_imagesz'don't include any formatting for imagesz--images-as-htmlimages_as_htmlzWAlways write image tags as raw html; preserves `height`, `width` and `alt` if possible.z--images-to-altimages_to_altz&Discard image data, only keep alt textz--images-with-sizeimages_with_sizezMWrite image tags with height and width attrs as raw html to retain dimensionsz-gz--google-doc
google_docFz(convert an html-exported Google Documentz-dz--dash-unordered-listul_style_dashz6use a dash rather than a star for unordered list itemsz-ez--asterisk-emphasisem_style_asteriskz=use an asterisk rather than an underscore for emphasized textz-bz--body-width
body_widthz3number of characters per output line, 0 for no wrap)r   typer   r   z-iz--google-list-indentlist_indentz,number of pixels Google indents nested listsz-sz--hide-strikethroughhide_strikethroughzDhide strike-through text. only relevant when -g is specified as wellz--escape-allescape_snobzbEscape all special characters.  Output is less readable, but avoids corner case formatting issues.z--bypass-tablesbypass_tablesz2Format tables in HTML rather than Markdown syntax.z--ignore-tablesignore_tableszAIgnore table-related tags (table, th, td, tr) while keeping rows.z--single-line-breaksingle_line_breakzhUse a single line break after a block element rather than two line breaks. NOTE: Requires --body-width=0z--unicode-snobunicode_snobzUse unicode throughout documentz--no-automatic-linksuse_automatic_linksz.Do not use automatic links wherever applicablez--no-skip-internal-linksskip_internal_linkszDo not skip internal linksz--links-after-paralinks_each_paragraphz2Put links after each paragraph instead of documentz--mark-code	mark_codez.Mark program code blocks with [code]...[/code]z--decode-errorsdecode_errorszZWhat to do in case of decode errors.'ignore', 'strict' and 'replace' are acceptable valuesz--open-quote
open_quotez!The character used to open quotesz--close-quoteclose_quotez"The character used to close quotesz	--versionversion.)r    rA   filename?)nargsencodingzutf-8)rE   r   z--include-sup-subinclude_sup_subzInclude the sup and sub tags-rbzWarning:z	 Use the z--decode-errors=ignorez flag.)baseurl*__)\argparseArgumentParseradd_argumentr   DEFAULT_IMAGE_ALT
PAD_TABLES
WRAP_LINKSWRAP_LIST_ITEMSWRAP_TABLESIGNORE_EMPHASISINLINE_LINKSIGNORE_ANCHORSIGNORE_MAILTO_LINKSPROTECT_LINKSIGNORE_IMAGESIMAGES_AS_HTMLIMAGES_TO_ALTIMAGES_WITH_SIZEint
BODY_WIDTHGOOGLE_LIST_INDENTBYPASS_TABLESIGNORE_TABLESSINGLE_LINE_BREAKUNICODE_SNOBUSE_AUTOMATIC_LINKSSKIP_INTERNAL_LINKSLINKS_EACH_PARAGRAPH	MARK_CODEDECODE_ERRORS
OPEN_QUOTECLOSE_QUOTEjoinmapstrr   INCLUDE_SUP_SUB
parse_argsrC   openreadsysstdinbufferdecoderF   r>   UnicodeDecodeErrorr   r   r   printr   r/   ul_item_markr0   emphasis_markstrong_markr1   r3   google_list_indentr%   r'   r(   r)   r*   r+   r,   r-   r.   r4   r5   r6   r7   r8   r&   r9   r:   r;   r<   r=   r!   r#   r$   r   r   r?   r@   rG   stdoutwritehandle)
rJ   r   pargsfpdatahtmlerrwarninghs
             r   mainr      s>   G  	!ANN ((B	   NN!!<   NN!!1   NN&&0   NN""   NN&&8   NN##@   NN%%5   NN"***   NN$$R   NN$$6   NN%%!  	 NN$$5   NN''  	 NN7   NNE   NN L   NN!!B   NN));   NN!V   NN-  	 NN$$A   NN$$S   NN ((4  	 NN##.   NN"**=   NN""**)   NN#++A   NN  =   NN$$    NN!!0	   NN""1	   NNIsxxC8M/N   NN:SN)NN:S'N:NN&&+   <<>D}}#-$--&"779D '& yy$$&{{4==$*<*<= 	'"A??AL++A,,A&&AN 44A((AO((AO**A((AO..A??AL22A$$AM((AO((AO00A&&AN&&AN 44A 44A!66A..AK??AL,,A$$AM??AL00A??AL$$AM,,AJJQXXd^$k '&  //J.=;00+gll:XEEg	s%   &_5)&` 5_?	a Aaa )returnN)rM   rs   r   r   r   r   r   r   r   r   <module>r      s     
 , ,C%r   