
    g~!                        d dl Zd dlmZmZmZ ddlmZ ej                  j                         D  ci c]$  \  } }| dk7  rej                  j                  |    |& c}} ZdedefdZd	edeeef   fd
Zdedeeeeef   f   fdZdeeee   f   deeeeef   f   deeef   deeef   fdZd	eeef   defdZd	eeef   defdZd	eeef   dee   fdZd	eeef   defdZdeeee   f   defdZdededededef
dZdedefdZ	 	 	 	 	 d'dedededed ed!edefd"Zd#ee   d$edee   fd%Zd(ded$edefd&Zyc c}} w ))    N)DictListOptional   )confignbsptagreturnc                 j    | d   dk(  r+t        |       dk(  r| d   }d|cxk  rdk  rt        |      S  yy)Nr   h   r   09)lenint)r	   ns     M/var/www/openai/venv/lib/python3.12/site-packages/crawl4ai/html2text/utils.pyhnr      s<    
1v}SQF>c>q6M     stylec                    | j                  d      D cg c]  }d|v s|j                  dd       c}D ci c]@  \  }}|j                         j                         |j                         j                         B c}}S c c}w c c}}w )z,
    :returns: A hash of css attributes
    ;:r   )splitstriplower)r   zxys       r   dumb_property_dictr       sy     /4kk#.>K.>#(QWWS!_.>KKDAq 	
	1779??,,K Ks   	BBABdatac                    | dz  } | j                  d      }|dk7  r5| d| | | j                  d|      dz   d z   } | j                  d      }|dk7  r5| j                  d      D cg c]&  }d|j                         v s|j                  d      ( }}	 |D ci c]  \  }}|j                         t        |      ! }}}|S c c}w c c}}w # t        $ r i }Y |S w xY w)	z
    :type data: str

    :returns: A hash of css selectors, each of which contains a hash of
    css attributes.
    :rtype: dict
    r   z@importr   r   N}{)findr   r   r    
ValueError)r!   importIndexr   pairsabelementss          r   dumb_css_parserr-      s     	CKD))I&K

Ak"T$))C*E*I*K%LLii	* 
 $(::c?G?acQWWY6FQWWS\?EGAFGAAGGI1!44G O HG Os0   $C <C C $C;C C CCattrs	style_defparent_stylec                    |j                         }d| v rE| d   J | d   j                         D ](  }|j                  d|z   i       }|j                  |       * d| v r&| d   J t	        | d         }|j                  |       |S )z
    :type attrs: dict
    :type style_def: dict
    :type style_def: dict

    :returns: A hash of the 'final' style attributes of the element
    :rtype: dict
    class.r   )copyr   getupdater    )r.   r/   r0   r   	css_class	css_styleimmediate_styles          r   element_styler:   9   s     E%W~)))w--/I!cIor:ILL# 0 %W~))),U7^<_%Lr   c                      d| v r
| d   }|dv ryy)zh
    Finds out whether this is an ordered or unordered list

    :type style: dict

    :rtype: str
    zlist-style-type)disccirclesquarenoneulol )r   
list_styles     r   google_list_stylerD   T   s&     E!,-
==r   c                 
    d| v S )z
    Check if the style of the element has the 'height' attribute
    explicitly defined

    :type style: dict

    :rtype: bool
    heightrB   )r   s    r   google_has_heightrG   d   s     ur   c                     g }d| v r|j                  | d          d| v r|j                  | d          d| v r|j                  | d          |S )zk
    :type style: dict

    :returns: A list of all emphasis modifiers of the element
    :rtype: list
    ztext-decorationz
font-stylezfont-weight)append)r   emphasiss     r   google_text_emphasisrK   p   sZ     HE!/01ul+,m,-Or   c                 0    d}d| v r| d   }d|k(  xs d|k(  S )zu
    Check if the css of the current element defines a fixed width font

    :type style: dict

    :rtype: bool
     zfont-familyzcourier newconsolasrB   )r   font_familys     r   google_fixed_width_fontrP      s0     KM*K'D:+DDr   c                 \    d| v r| d   J 	 t        | d         dz
  S y# t        $ r Y yw xY w)zh
    Extract numbering from list element attributes

    :type attrs: dict

    :rtype: int or None
    startr   r   )r   r'   )r.   s    r   list_numbering_startrS      sQ     %W~)))	uW~&**   		s    	++para
wrap_linkswrap_list_itemswrap_tablesc                    |s t         j                  j                  |       ry| dd dk(  s| d   dk(  ry| j                         }|dd dk(  rt	        |      dkD  r	|d   dk7  ry	|dd
 dv r|dd dk(  s| S |s t         j
                  j                  |       ryt        t         j                  j                  |      xs t         j                  j                  |            S )NTr      z    	r   z---Fr   )r[   *z**)
r   RE_LINKsearchlstripr   RE_TABLEboolRE_ORDERED_LIST_MATCHERmatchRE_UNORDERED_LIST_MATCHER)rT   rU   rV   rW   strippeds        r   skipwraprf      s    
 &..//5 AayFd1go {{}H!}X!2x{c7I
 !}
"8Aa=D+@""" 6??11$7
 &&,,X6 	<++11(; r   textc                 B    t         j                  j                  d|       S )zU
    Escapes markdown-sensitive characters within other markdown
    constructs.
    \\\1)r   RE_MD_CHARS_MATCHERsub)rg   s    r   	escape_mdrl      s    
 %%))'488r   escape_backslashsnob
escape_dotescape_plusescape_dashc                 Z   |r t         j                  j                  d|       } |r t         j                  j                  d|       } |r t         j                  j                  d|       } |r t         j
                  j                  d|       } |r t         j                  j                  d|       } | S )z
    Escapes markdown-sensitive characters across whole document sections.
    Each escaping operation can be controlled individually.
    ri   z\1\\\2)r   RE_MD_BACKSLASH_MATCHERrk   RE_MD_CHARS_MATCHER_ALLRE_MD_DOT_MATCHERRE_MD_PLUS_MATCHERRE_MD_DASH_MATCHER)rg   rm   rn   ro   rp   rq   s         r   escape_md_sectionrx      s     --11'4@--11'4@''++It<((,,Y=((,,Y=Kr   linesright_marginc                    | d   j                  d      D cg c]  }t        |j                               |z     }}t        |      }| D ]  }|j                  d      D cg c]  }|j                          }}t        |      }||k  r|dg||z
  z  z  }n-||k  r(||||z
   d D cg c]  }t        |      |z    c}z  }|}t        ||      D cg c]  \  }}t	        t        |      |z   |       }}} g }	| D ]=  }|j                  d      D cg c]  }|j                          }}t        |j                               t        d      k(  rud}
t        ||      D cg c]5  \  }}|j                         |
|t        |j                               z
  z  z   7 }}}|	j                  ddj                  |      z   dz          d}
t        ||      D cg c]5  \  }}|j                         |
|t        |j                               z
  z  z   7 }}}|	j                  d	dj                  |      z   dz          @ |	S c c}w c c}w c c}w c c}}w c c}w c c}}w c c}}w )
zR
    Given the lines of a table
    padds the cells and returns the new lines
    r   |rM   Nz-|r[   z|- z| )	r   r   rstripzipmaxsetr   rI   join)ry   rz   r   	max_widthmax_colslinecolsnum_colsold_len	new_linesfillerMnew_colss                r   reformat_tabler      sq    :?q9LM9LAQXXZ</9LIM9~H$(JJsO4Oq
O4t9 hRDHx/00D 8@S>T>V9WX9WA#a&</9WXXIH CFdIBV
BVJAwCA%w/BV 	 
   I$(JJsO4Oq
O4tzz|D	)F  i00DAq 
fC
O(;<=0   TCHHX$66<=F  i00DAq 
fC
O(;<=0   TCHHX$66<=   G N 5 Y
 5s)   #H?I&I	"II":I:Ic                 :   | j                  d      }g }d}g }|D ]o  }t        j                  |v r6| }|s0t        ||      }|j	                  |       g }|j                  d       K|r|j                  |       _|j                  |       q dj                  |      S )z0
    Provide padding for tables in the text
    
FrM   )r   r   TABLE_MARKER_FOR_PADr   extendrI   r   )rg   rz   ry   table_buffertable_startedr   r   tables           r   pad_tables_in_textr     s     JJtELMI&&$. --M &|\B  '!  $%T"  99Yr   )TFTTT)r   )html.entitieshtmltypingr   r   r   rM   r   	UNIFIABLEitemsentitiesname2codepointunifiable_nstrr   r   r    r-   r:   rD   ra   rG   rK   rP   rS   rf   rl   rx   r   r   )kvs   00r   <module>r      sz    ' '    &&((1F{ 	MM  #Q&(C C c d38n # $sDcN':"; 4Xc]"#Cc3h'( sCx. 
#s(^	6T#s(^   	T#s(^ 	 	S#X 49 $E4S> Ed ES(3-%7 8 S $"
""26"EI"	"J9C 9C 9 "
  	
   	:)$s) )3 )49 )X S    C  es   )D?