
    gؤ              	       8   d Z ddlZddlZddlZddlZddlmZ ddl	m
Z
 ddlmZmZmZmZmZ ddlmZ ddlmZ ddlmZmZ dd	lmZmZmZmZmZmZmZm Z m!Z!m"Z"m#Z#m$Z$m%Z% d
Z& G d dejN                  jP                        Z)dde*de*dee+   de*fdZ, G d de)      Z-y)z>html2text: Turn HTML into equivalent Markdown-structured text.    N)wrap)DictListOptionalTupleUnion   )config)OutCallback)AnchorElementListElement)dumb_css_parserelement_style	escape_mdescape_md_sectiongoogle_fixed_width_fontgoogle_has_heightgoogle_list_stylegoogle_text_emphasishnlist_numbering_startpad_tables_in_textskipwrapunifiable_n)i        c            	       
    e Zd Zddej                  fdee   dededdf fdZ	d Z
d	eddf fd
Zd	edefdZdeddfdZdefdZdeddfdZdeddfdZdedeeeee   f      ddfdZdeddfdZdeeee   f   dee   fdZdedeeef   deeef   ddfdZdedeeee   f   deddfdZd+dZd+dZd+dZ	 d,d	eded eeef   ddfd!Zd-d	ed"eddfd#Zd$edefd%Z dedefd&Z!d'eeef   defd(Z"d)edefd*Z# xZ$S ).	HTML2TextN outbaseurl	bodywidthreturnc                 4   t         |   d       d| _        d| _        d| _        t
        j                  | _        t
        j                  | _	        t
        j                  | _        t
        j                  | _        t
        j                  | _        t
        j                   | _        t
        j$                  | _        || _        t
        j*                  | _        t
        j.                  | _        t
        j2                  | _        t
        j6                  | _        t
        j:                  | _        t
        j>                  | _         t
        jB                  | _"        t
        jF                  | _$        t
        jJ                  | _&        t
        jN                  | _(        t
        jR                  | _*        t
        jV                  | _,        t
        jZ                  | _.        d| _/        d| _0        d| _1        d| _2        t
        jf                  | _4        t
        jj                  | _6        d| _7        t
        jp                  | _9        t
        jt                  | _;        t
        jx                  | _=        t
        j|                  | _?        t
        j                  | _A        t
        j                  | _C        d| _D        t
        j                  | _F        t
        j                  | _H        t
        j                  | _J        || j                  | _L        n|| _L        g | _M        d| _N        d| _O        d| _P        d| _Q        d| _R        g | _S        g | _T        d| _U        d| _V        t        j                  d	      | _Y        d| _Z        g | _[        d| _\        d| _]        d| _^        d| __        d| _`        d
| _a        d| _b        d| _c        d| _d        i | _e        g | _f        d| _g        d| _h        d| _i        d| _j        d| _k        i | _l        || _m        d| _n        d| _o        d
| _p        d
| _q        dt
        j                  d<   y)z
        Input parameters:
            out: possible custom replacement for self.outtextf (which
                 appends lines of text).
            baseurl: base URL of the document we process
        F)convert_charrefsr   *_z**NTz^[a-zA-Z+]+://r   &nbsp_place_holder;nbsp)ssuper__init__split_next_tdtd_counttable_startr
   UNICODE_SNOBunicode_snobESCAPE_SNOBescape_snobESCAPE_BACKSLASHescape_backslash
ESCAPE_DOT
escape_dotESCAPE_PLUSescape_plusESCAPE_DASHescape_dashLINKS_EACH_PARAGRAPHlinks_each_paragraph
body_widthSKIP_INTERNAL_LINKSskip_internal_linksINLINE_LINKSinline_linksPROTECT_LINKSprotect_linksGOOGLE_LIST_INDENTgoogle_list_indentIGNORE_ANCHORSignore_linksIGNORE_MAILTO_LINKSignore_mailto_linksIGNORE_IMAGESignore_imagesIMAGES_AS_HTMLimages_as_htmlIMAGES_TO_ALTimages_to_altIMAGES_WITH_SIZEimages_with_sizeIGNORE_EMPHASISignore_emphasisBYPASS_TABLESbypass_tablesIGNORE_TABLESignore_tables
google_docul_item_markemphasis_markstrong_markSINGLE_LINE_BREAKsingle_line_breakUSE_AUTOMATIC_LINKSuse_automatic_linkshide_strikethrough	MARK_CODE	mark_codeWRAP_LIST_ITEMSwrap_list_items
WRAP_LINKS
wrap_linksWRAP_TABLESwrap_tables
PAD_TABLES
pad_tablesDEFAULT_IMAGE_ALTdefault_image_alttag_callback
OPEN_QUOTE
open_quoteCLOSE_QUOTEclose_quoteINCLUDE_SUP_SUBinclude_sup_subouttextfr    outtextlistquietp_poutcountstartspaceaastackmaybe_automatic_link
empty_linkrecompileabsolute_url_matcheracountlist
blockquoteprestartprecodequote	br_toggle	lastWasNLlastWasListstyle	style_def	tag_stackemphasisdrop_white_spaceinheader
abbr_title	abbr_data	abbr_listr!   stressedpreceding_stressedpreceding_datacurrent_tag	UNIFIABLE)selfr    r!   r"   	__class__s       P/var/www/openai/venv/lib/python3.12/site-packages/crawl4ai/html2text/__init__.pyr+   zHTML2Text.__init__&   sA    	%0 # "//!-- & 7 7 ++!--!--$*$?$?!##)#=#= "//#11"(";";"11#)#=#= #11$33#11 & 7 7%55#11#11 !'!9!9#)#=#= "'))%55 ++!-- ++!'!9!9  ++!--%55;}}DHDH ')


&(@B37!$&JJ/@$A!')		
 
46UW !)-(,)+"' #8     c                 N    |j                         D ]  \  }}t        | ||        y )N)itemssetattrr   kwargskeyvalues       r   update_paramszHTML2Text.update_params   s"     ,,.JCD#u% )r   datac                 H    |j                  dd      }t        | 	  |       y )Nz</' + 'script>z	</ignore>)replacer*   feed)r   r   r   s     r   r   zHTML2Text.feed   s     ||,k:Tr   c                     d| _         | j                  |       | j                  d       | j                  | j                               }| j                  rt        |      S |S )NTr   )ry   r   optwrapfinishrj   r   )r   r   markdowns      r   handlezHTML2Text.handle   sJ    
		$		"<<.??%h//Or   sc                 Z    | j                   j                  |       |r|d   dk(  | _        y y )N
)ru   appendr   )r   r   s     r   rt   zHTML2Text.outtextf   s-    "rUd]DN r   c                 ,   | j                          | j                          | j                  dd       dj                  | j                        }| j
                  rt        j                  j                  d   }nd}|j                  d|      }g | _        |S )Nr   endforceznbsp; r(   )
closepbrojoinru   r0   htmlentitieshtml5r   )r   outtextr)   s      r   r   zHTML2Text.finish   sz    


r''$**+==&&w/DD//"7> r   cc                 F    | j                  | j                  |      d       y NT)handle_datacharref)r   r   s     r   handle_charrefzHTML2Text.handle_charref   s    a$/r   c                 P    | j                  |      }|r| j                  |d       y y r   )	entityrefr   )r   r   refs      r   handle_entityrefzHTML2Text.handle_entityref   s*    nnQ S$' r   tagattrsc                 >    | j                  |t        |      d       y )NTry   )
handle_tagdict)r   r   r   s      r   handle_starttagzHTML2Text.handle_starttag   s    T%[5r   c                 ,    | j                  |i d       y )NFr   )r   )r   r   s     r   handle_endtagzHTML2Text.handle_endtag   s    Ru-r   c                    d|vryd}t        | j                        D ]l  \  }}d|j                  v rS|j                  d   |d   k(  r>d|j                  v sd|v r*d|j                  v rd|v r|j                  d   |d   k(  rd}nd}|sj|c S  y)z
        :type attrs: dict

        :returns: The index of certain set of attributes (of a link) in the
        self.a list. If the set of attributes is not found, returns None
        :rtype: int
        hrefNFtitleT)	enumerater{   r   )r   r   matchir{   s        r   previousIndexzHTML2Text.previousIndex   s     dff%DAq QWWV_f%Eagg%E)9177*#u,GGG,g> $ E & r   ry   	tag_styleparent_stylec                    t        |      }t        |      }d|v xr | j                  }d}t        j                  D ]  }||v xr ||v}|s n d|v xr d|v}	t	        |      xr t	        |       xr | j
                   }
|r|s|	s|
r| xj                  dz  c_        |r| xj                  dz  c_        |	r0| j                  | j                         | xj                  dz  c_
        |r0| j                  | j                         | xj                  dz  c_
        |
r.| j                  d       | xj                  dz  c_
        d| _        yy|s|	s|
r| xj                  dz  c_        d| _        |
r:| j                  r| xj                  dz  c_
        n| j                  d       d| _        |r=| j                  r| xj                  dz  c_
        n| j                  | j                         |	r=| j                  r| xj                  dz  c_
        n| j                  | j                         |s|	r| j                  s| j                  d       |r| xj                  dz  c_        yy)	z/
        Handles various text emphases
        zline-throughFitalicr	   `Tr   N)r   r`   r
   BOLD_TEXT_STYLE_VALUESr   r   r   rv   r   rZ   r   r[   r   rz   )r   ry   r   r   tag_emphasisparent_emphasisstrikethroughboldbold_markerr   fixeds              r   handle_emphasiszHTML2Text.handle_emphasis   s    ,I6.|< ',6R4;R;R !88K,.U;o3UD 9
 \)Mho.M#I. +L99HH 	  v"

a
t))*%%*%t''(%%*%s%%*% 	 
 v""
(())Q.)FF3K!	(())Q.)FF4++,(())Q.)FF4--.s

a
 r   c           	      !   || _         | j                  | j                  | |||      du ry |r@| j                  4|dvr0|dk7  s| j                  r| j	                  d       d | _        d| _        | j                  ri }|rT| j                  r| j                  d   d   }t        || j                  |      }| j                  j                  |||f       nM| j                  r| j                  j                         nd i i f\  }}}| j                  r| j                  d   d   }t        |      r| j                  r|rxd| _        | j                  r| j                  d   dk(  r| j                  j                          d| _        | j	                  t        |      dz  d	z          | j	                  d       nQd
| _        d| _        y | j%                          |r(d| _        | j	                  t        |      dz  d	z          nd| _        y |dv re| j                  r/|rt'              r| j%                          n;| j)                          n*| j                  rn| j*                  rn| j%                          |dk(  r4|r2| j,                  d
kD  r| j	                  d       n| j	                  d       |dk(  r3|r1| j%                          | j	                  d       | j%                          |dv r-|r| xj.                  dz  c_        n| xj.                  dz  c_        |dk(  r-|r| xj0                  dz  c_        n| xj0                  dz  c_        |dv rd
| _        |dk(  rg|r@| j%                          | j	                  dd       d| _        | xj,                  dz  c_        n%| xj,                  dz  c_        | j%                          |dv r| j4                  s|ro| j6                  rc| j6                  d   t8        j:                  vrD| j6                  d   t8        j<                  vr%d	| j>                  z   }| xj6                  d	z  c_        n| j>                  }| j	                  |       |rd| _         |dv r| j4                  s|rh| j6                  r\tC        | jD                        d
kD  rD| j6                  d   | jD                  d
   k(  r%d	| jD                  z   }| xj6                  d	z  c_        n| jD                  }| j	                  |       |rd| _         |dv rT|r6| j6                  r*| j6                  d   dk(  rd}	| xj6                  d	z  c_        nd}	| j	                  |	       |rd| _         | j                  r| j                  s| jG                  |       |dv r/| jH                  s#| j	                  d       | jJ                   | _%        |d k(  rj|rd | _&        d!| _'        d"|v rV|d"   | _&        nK| jL                  8| jN                  J | jL                  | jP                  | jN                  <   d | _&        d | _'        |d#k(  rU| jR                  s| j	                  | jT                         n| j	                  | jV                         | jR                   | _)        dWd$tX        d%tZ        d"tZ        d&d fd'}
|d(k(  r| j\                  s|rd)|v r|d)   | j^                  r|d)   ja                  d      si| jb                  r|d)   ja                  d*      sI| j                  j                  |       |d)   | _        d| _        | jd                  rd+|d)   z   d,z   |d)<   ny| j                  j                  d        n\| j                  rO| j                  j                         }| j                  r| j
                  s	d | _        n|r|d)   J | j
                  r| j	                  d       d| _        d | _        | jf                  r5d
| _        |ji                  d"      xs d!}tk        |      } |
| |d)   |       n| jm                  |      }|| jn                  |   }nQ| xjp                  dz  c_8        ts        || jp                  | jt                        }| jn                  j                  |       | j	                  d-t[        |jv                        z   d.z          |dk(  r|r| j                  sd/|v r|d/   | jx                  s|d/   |d)<   |ji                  d0      xs | jz                  }| j|                  s| j~                  rd1|v sd2|v r| j	                  d3|d/   z   d4z          d1|v r|d1   | j	                  d5|d1   z   d4z          d2|v r|d2   | j	                  d6|d2   z   d4z          |r| j	                  d7|z   d4z          | j	                  d8       y | j                  | j                  }| jx                  rQtk        |      |k(  rC| j                  j                  |      r(| j	                  d+tk        |      z   d,z          d| _        y | j	                  d       d | _        d| _        | jx                  r| j	                  tk        |             n| j	                  d9tk        |      z   d.z          | jf                  rT|ji                  d)      xs d!}| j	                  d:tk        t        j                  | j                  |            z   d;z          n| jm                  |      }|| jn                  |   }nQ| xjp                  dz  c_8        ts        || jp                  | jt                        }| jn                  j                  |       | j	                  dt[        |jv                        z   d.z          |d<k(  r|r| j%                          |d=k(  r|s| j                          |d>k(  r|r| j	                  d?       |d>k(  r|s| j                          |d@v r| j                  s| j                  s| j%                          |rK| j                  rt              }n|}t        |      }| j                  j                  t        ||             nO| j                  rC| j                  j                          | j                  s| j                  s| j	                  dA       d| _G        nd| _G        |dBk(  r1| j                          |r| j                  r| j                  d   }nt        dCd
      }| j                  r$| j	                  dD| j                        z         nFd }| j                  D ]5  }| j	                  |dEk(  r|j                  dCk(  rdFndD       |j                  }7 |j                  dCk(  r| j	                  | j                  d	z          nK|j                  dEk(  r<|xj                  dz  c_N        | j	                  t[        |j                        dGz          d| _        |dHv rC| j                  r|dIk(  r|rn.| j)                          nn| j                  r|r| j)                          |dJv rF|r"| j	                  dKj                  |             n| j	                  dLj                  |             n|r"| j	                  dMj                  |             n| j	                  dNj                  |             nl|dOk(  r|rJd| _R        | j                  r| j	                  d+t        j                  z   d,z          | j	                  d       nR| j                  rF| j)                          | j	                  dPt        j                  z   d,z          | j	                  d       |dJv r&|r$| j*                  r| j	                  dQ       d| _        |dIk(  r	|rd
| _V        |dIk(  r|sd| _        | j)                          |dIk(  rS|sQ| j                  rE| j	                  dRj                  dSg| j                  z               | j)                          d| _R        |dJv r|r| xj                  dz  c_V        |dTk(  rE|rd| _X        d| _$        n$d| _$        | j                  r| j                  dU       | j%                          |dVv rQ| j                  rD|r!| j	                  dMj                  |             y | j	                  dNj                  |             y y y )XNT)pdivr   dldtimg[Fr   r   #r   r   )r   r   brz  
>   
hrz* * *)headr   scriptr	   r   )bodyr   > r   )emr   u)strongb)delstriker   ~z ~~z~~)kbdr   ttr   abbrr   r   qr   linkr#   c                     t        j                  | j                  |      }|j                         rdj	                  |      nd}| j                  dj	                  t        |      |             y )Nz "{}"r   z]({url}{title}))urlr   )urlparseurljoinr!   stripformatr   r   )r   r  r   r  s       r   link_urlz&HTML2Text.handle_tag.<locals>.link_url  sQ    ""4<<6C-2[[]GNN5)EFF$++	#e+LMr   r{   r   zmailto:<>z][]srcaltwidthheightz
<img src='z' zwidth='zheight='zalt='z/>z![()r   r   dd    )olulr   lir    r  z   z. )tabletrtdthr  )r  r  z<{}>

z
</{}>z<{}>z</{}>r  </z| |z---r   z
[/code])supsub)r   )\r   rm   r}   rK   r   r~   rX   r   r   r   r   popr   r|   r   ru   rz   rw   r   r   soft_brr,   r   rv   r   ry   rS   r   string
whitespacepunctuationrZ   r   lenr[   r   r   r   r   r   r   r   ro   rq   r   strrG   r?   
startswithrI   rC   rA   getr   r   r{   r   r   rx   countrO   rl   rM   rQ   r   r   r  r  r!   r   r   r   r   r   r   google_nest_countnamerY   numrW   rU   r  r.   rj   r
   TABLE_MARKER_FOR_PADr-   r   r   rb   r    rs   )r   r   r   ry   r   r   dummyr   r   r   r	  r{   r   r   a_propsr  r   
list_stylenumbering_startr  parent_listr   s                         r   r   zHTML2Text.handle_tag6  s    (  sE59TA
 ))5<<!3!3FF3K(,D%#DO??
 ,.L>>#'>>"#5a#8L)%N	%%sE9&=> -1NNDNN&&(r2 (ui >>#'>>"#5a#8Lc7{{$(DM''D,<,<R,@C,G((,,.%*
r#w}s23s DH$)DM$(DMFF2c7S=3./$)DM,.y9FFHLLN##$;5"x v$;5FFHFF7OFFH--

a


a
'>

a


a
(?DJ,t4(!
1$1$""4+?+? ''''+63D3DD''+63E3EE!3!33##s*#--FF8 $/!$*>*> '' (()A-''+t/?/?/BBt///##s*#))FF6N $((,,1D1DR1HC1O##s*#FF6N $??==$$UI|D''FF3K IIDI&="&!#e#&+GnDO??.>>55559__DNN4>>2&*DO!%#:::t't''(!ZZDJ	N9 	NC 	N 	NT 	N
 #:d//eOf1!55%-:R:RSV:W00U6]5M5Mi5X KK&&u-05fD-&*DO))(+eFm(;c(AfKK&&t,;;)A00481 y444?? FF3K.3DO8<D5,,'(DH$%EE'N$8bE$-e$4E$T1V9e< $ 2 21 5A }*.&&) $q 0*74;;*V $g 6 FF4#gmm*<#<s#BC%<E$*<*<~%,":))$)%LE&Mii&@$*@*@ &&))w%/?8uCTFF<%,6=>%'E'N,Fy5>9D@A5(U8_-HzE(O;dBCw}t34FF4L ,,844D**%cNd2 55;;DAsYs^3c9:*/s481*/ %%FF9S>*FF4)C.0367(($yy06B)H,<,<T\\4,P"QQTWW !..u5=&*ffQiG KK1,K&3E4;;&VG FFMM'2sS%77#=>$;5FFH$;uHHJ$;5FF6N$;uHHJ,99T%5%5??!29!=J!$J"6u"=		  Z!IJ99IIMMO??499t#D$D$;HHJ992B$T1-B??FF4$"8"8"CCD #'K $		%0D%8TYY$=NETX '+ii	 !* 77d?FF4,,s23WW_FFaKFFF3rvv;-.!
--!!$;##LLN,&z0056y//45v}}S12w~~c23 '>+/(?? FF3)D)D#Ds#JK FF6N?? LLN FF4&*E*E#E#KL FF6N,&5))t)-D&$;5$%DM$;u).D&LLN$;u1A1AFF388UGdmm$;<=LLN',D$,&5MMQ&M%< $ >>HH[)FFH. T%9%9v}}S)*w~~c*+	 &: r   c                 2    | j                   dk(  rd| _         yy)zPretty print has a line breakr   r	   N)rw   r   s    r   r   zHTML2Text.pbr  s    88q=DH r   c                 :    | j                   rd| _        yd| _        y)z Set pretty print to 1 or 2 linesr	   r   N)r]   rw   r5  s    r   r   zHTML2Text.p  s    ..1Ar   c                 2    | j                          d| _        y)zSoft breaksr  N)r   r   r5  s    r   r"  zHTML2Text.soft_br  s    
r   puredatar   c           	         | j                   | xj                   |z  c_         | j                  s| j                  rB|j                         }| j                  r| j
                  s| j                  s|}|dk7  rd| _        |r9| j
                  s-t        j                  dd|      }|r|d   dk(  rd| _	        |dd }|s|sy| j                  rK|j                  d      s|j                  d	      sd|z   }| j                  r| j                  d
       d| _        d| j                  z  }|r
|r|d   dk(  s| j                  r|dz  }| j
                  rA| j                   s|dz  }|dt#        | j                         z  z  }|j%                  dd|z         }| j                  r$d| _
        | j                   r|j                  d      }| j&                  rd| _	        d| _        d| _        |dk(  rd| _        | j                  d       d| _	        | j                  r<| j                  | j(                  dz   |z   | j                  z         d| _	        d| _        | j                  r$| j*                  s| j                  d       d| _	        | j,                  rF| j                  dk(  r| j.                  s|dk(  r%|dk(  r| j                  d       g }| j,                  D ]  }| j0                  |j0                  kD  r| j                  dt3        |j4                        z   dz   t7        j8                  | j:                  |j<                  d         z          d|j<                  v r3|j<                  d   $| j                  d|j<                  d   z   dz          | j                  d       |j?                  |        | j,                  |k7  r| j                  d       || _        | j@                  rD|dk(  r?| j@                  jC                         D ]"  \  }}	| j                  d|z   dz   |	z   dz          $ d| _        | j                  |       | xj0                  dz  c_        yy)z6
        Deal with indentation and whitespace
        Nr   r   z\s+r   Tr	   r   z
z
[code]r  r  Fr   r   z   [z]: r   r   z (r  z  *[)"r   rv   rX   lstripr   r   r   r   r   rz   r   r(  rb   r    rw   r   r   r&  r   ry   r   r   r{   r<   rx   r'  r*  r  r  r!   r   r   r   r   )
r   r   r8  r   lstripped_databqnewar  r   
definitions
             r   r   zHTML2Text.o  s    >>%NNd"Nzz "&(($((dii)D!R',-D) vvfdD1DGsN!%DJ8D}}t,T__V5L$;D>>HHZ( DHt&BdtAw#~4??c	xxyy&LBfs499~--||D$)4}} %99;;t,Dzz"
"
~"
xx$..4/"4@A"
!#zz~~HHSM"
vvQ4#<#<%E>HHTN FFD}}t}}4"!$**o.#$ '..t||TZZ=OPQ #djj0TZZ5H5T HHTDJJw,?%?#%EFD) # 66T>HHTN~~%5.(,(<(<(>$D*HHVd]U2Z?$FG )? DHHHTNMMQMK r   entity_charc                 D   |sy | j                   r|j                         }d| _         d| _        nT| j                  rHt        j                  d|d         r(t        | j                        s| j                  dvrd|z   }d| _        | j                  r$| j                  j                  t        |             | j                  v| j                  }||k(  rF| j                  j	                  |      r+| j                  r| j                  d|z   dz          d| _        y | j                  d	       d | _        d| _        | j                   sF| j"                  s:|s8t%        || j&                  | j(                  | j*                  | j,                  
      }|| _        | j                  |d       y )NFTz[^][(){}\s.!?]r   )r{   r   r   r   r
  r  r   )snobr6   r8   r:   )r8  )r   r  r   r   r   r   r   r   r   updater   r}   r   r_   r   r~   r   r   r   r2   r6   r8   r:   r   )r   r   r?  r   s       r   r   zHTML2Text.handle_datac  si    ==::<D!DM&*D#$$*DG44++,$$,@@ Tz&+D#::NN!!/$"78$$0,,D--33D9,,sTzC'("'s,0)"'yy+$T0@0@T__jnjzjz  IM  IY  IY  ZD"td#r   r,  c                     |d   dv rt        |dd  d      }nt        |      }| j                  s|t        v r	t        |   S 	 t        |      S # t        $ r Y yw xY w)Nr   )xXr	      r   )intr0   r   chr
ValueError)r   r,  r   s      r   r   zHTML2Text.charref  sc    7j DHb!AD	A  Q+%5q>!1v s   
A 	AAc                    | j                   s%|t        j                  v rt        j                  |   S 	 t        j                  j
                  |dz      }|dk(  rt        j                  |   S |S # t        $ r d|z   dz   cY S w xY w)N;&r)   )r0   r
   r   r   r   r   KeyError)r   r   chs      r   r   zHTML2Text.entityref  s      Q&*:*:%:##A&&	!$$QW-B '(6kv"9r9  	!7S= 	!s    A- -B Br   c                 N    d}d|v rt        |d   dd       | j                  z  }|S )zq
        Calculate the nesting count of google doc lists

        :type style: dict

        :rtype: int
        r   zmargin-leftN)rG  rE   )r   r   
nest_counts      r   r+  zHTML2Text.google_nest_count  s8     
E!U=1#2674;R;RRJr   textc                    | j                   s|S d}d}| j                  sd| _        |j                  d      D ]  }t	        |      dkD  rt        || j                  | j                  | j                        sd}|j                  d| j                  z         rd}n|j                  d      rd}t        || j                   d|      }|dj                  |      z  }|j                  d      r|d	z  }d
}|r|dz  }d
}|dz  }d}t        j                  j                  |      r||dz   z  }d
}|dk  s|dz  }|d
z  }	 |S )zi
        Wrap all paragraphs in the provided text.

        :type text: str

        :rtype: str
        r   r   Fr   r  r  r   )break_long_wordssubsequent_indentr   r	   z

r   )r=   rf   rA   splitr&  r   rd   rh   r(  rY   r   r   endswithr
   RE_SPACEr   )r   rR  resultnewlinesparaindentwrappeds          r   r   zHTML2Text.optwrap  sU    K  %DJJt$D4y1}$//4+?+?AQAQ  Ftd.?.?'?@ "(. "&").*0	G dii00F}}T*&(#$$#$&(#$ "??006$+-#$a<dNFMHS %T r   )r#   N)FFF)%__name__
__module____qualname__r
   
BODY_WIDTHr   r   r'  rG  r+   r   r   r   rt   r   r   r   r   r   r   r   r   r   boolr   r   r   r   r"  r   r   r   r   r   r+  r   __classcell__r   s   @r   r   r   %   sM    &***	i9k"i9 i9 	i9
 
i9V&  3 3 +# +$ +
 (0 0 0
(# 
($ 
(63 6tE#x}:L4M/N 6SW 6. . .4Xc](:#;  8G G &*38nG DHcNG 	G Rl,l,#C#$67l,@Dl,	l,^
6 LQnn#'n8=dCi8Hn	n`)$ )$$ )$4 )$VC C :3 :3 :tCH~ # <C <C <r   r   r   r!   r"   r#   c                 b    |t         j                  }t        ||      }|j                  |       S )N)r!   r"   )r
   rb  r   r   )r   r!   r"   hs       r   	html2textrh    s-    %%	'Y7A88D>r   c                   @     e Zd Zdd fd
Zd Z fdZd fd	Z xZS )CustomHTML2TextF)handle_code_in_prec                >   t        |   |i | d| _        d| _        t	               | _        d | _        g | _        d| _        || _	        d| _
        d| _        d| _        d| _        d| _        d| _        d| _        d| _        d| _        d| _        d| _        d| _        y )NFr   T)r*   r+   
inside_preinside_codesetpreserve_tagscurrent_preserved_tagpreserved_contentpreserve_depthrk  r?   r]   rb   rs   r=   rI   rG   r4   r6   r8   r:   r2   )r   rk  argsr   r   s       r   r+   zCustomHTML2Text.__init__  s    $)&)  U%)"!#"4 $) !&$#' ! %   r   c                     |j                         D ]5  \  }}|dk(  rt        |      | _        |dk(  r|| _        )t	        | ||       7 y)z)Update parameters and set preserved tags.rp  rk  N)r   ro  rp  rk  r   r   s       r   r   zCustomHTML2Text.update_params  sG     ,,.JCo%%(Z",,*/'c5) )r   c                 >   || j                   v r|rz| j                  dk(  rU|| _        g | _        dj	                  d |j                         D              }| j                  j                  d| | d       | xj                  dz  c_        y | xj                  dz  c_        | j                  dk(  rX| j                  j                  d| d       dj	                  | j                        }| j                  d|z   dz          d | _        y | j                  dkD  rj|rHdj	                  d	 |j                         D              }| j                  j                  d| | d       y | j                  j                  d| d       y |d
k(  r4|r| j                  d       d| _        y | j                  d       d| _        y |dk(  rM| j                  r| j                  sy |r| j                  d       d| _
        y | j                  d       d| _
        y t        | 1  |||       y )Nr   r   c              3   :   K   | ]  \  }}|	d| d| d  y wNr   z="" .0kvs      r   	<genexpr>z-CustomHTML2Text.handle_tag.<locals>.<genexpr>#  s)     &`ARSR_1#Rs!}   
r
  r  r	   r  r   c              3   :   K   | ]  \  }}|	d| d| d  y wrx  rz  r{  s      r   r  z-CustomHTML2Text.handle_tag.<locals>.<genexpr>5  s'     "\]TQamQqcA3a=]r  r   z```
Tz
```
Fr   r   )rp  rs  rq  rr  r   r   r   r   rm  rk  rn  r*   r   )r   r   r   ry   attr_strpreserved_htmlr   s         r   r   zCustomHTML2Text.handle_tag  s   $$$$&&!+14D.-/D*!ww&`&``H**11AcU8*A2FG##q(###q(#&&!+**11Bse1+>%'WWT-C-C%DNFF4.047815D. "77"\U[[]"\\&&--#xj.BC  &&--3%qk: %<w"&y!"'F]t'>'>s#' s#( GsE51r   c                    | j                   dkD  r| j                  j                  |       y| j                  r| j	                  |       y| j
                  r"| j	                  |j                  dd             yt        | !  ||       y)z>Override handle_data to capture content within preserved tags.r   Nr   r   )	rs  rr  r   rm  r   rn  r   r*   r   )r   r   r?  r   s      r   r   zCustomHTML2Text.handle_dataP  sq    """))$/??FF4LFF4<<c*+ 	D+.r   r^  )r_  r`  ra  r+   r   r   r   rd  re  s   @r   rj  rj    s     16 !0*32j/ /r   rj  )r   N).__doc__html.entitiesr   html.parserr   r#  urllib.parseparser  textwrapr   typingr   r   r   r   r   r   r
   _typingr   elementsr   r   utilsr   r   r   r   r   r   r   r   r   r   r   r   r   __version__parser
HTMLParserr   r'  rG  rh  rj  rz  r   r   <module>r     s    D   	    5 5    0     J&& JXC # x} PS h/i h/r   