
    @gkA                     j    d Z ddlZddlmZmZmZmZ ddlmZ ddl	m
Z
mZmZ dZd
dZ G d d	e      Zy)zLConvert HTML with simple text formatting to text with ANSI escape sequences.    N)
HTMLParserStringIOname2codepointunichr)compact_empty_lines)ANSI_COLOR_CODES
ANSI_RESET
ansi_style)HTMLConverterhtml_to_ansic                 *    t        |      } ||       S )a  
    Convert HTML with simple text formatting to text with ANSI escape sequences.

    :param data: The HTML to convert (a string).
    :param callback: Optional callback to pass to :class:`HTMLConverter`.
    :returns: Text with ANSI escape sequences (a string).

    Please refer to the documentation of the :class:`HTMLConverter` class for
    details about the conversion process (like which tags are supported) and an
    example with a screenshot.
    )callback)r   )datar   	converters      P/var/www/openai/venv/lib/python3.12/site-packages/humanfriendly/terminal/html.pyr   r      s     x0IT?    c                       e Zd ZdZdZ	 d Zd Zed        Zd Z	ddZ
d	 Zd
 Zd Zd Zd Zd Zd Zd Zd Zd Zd Zy)r   a  
    Convert HTML with simple text formatting to text with ANSI escape sequences.

    The following text styles are supported:

    - Bold: ``<b>``, ``<strong>`` and ``<span style="font-weight: bold;">``
    - Italic: ``<i>``, ``<em>`` and ``<span style="font-style: italic;">``
    - Strike-through: ``<del>``, ``<s>`` and ``<span style="text-decoration: line-through;">``
    - Underline: ``<ins>``, ``<u>`` and ``<span style="text-decoration: underline">``

    Colors can be specified as follows:

    - Foreground color: ``<span style="color: #RRGGBB;">``
    - Background color: ``<span style="background-color: #RRGGBB;">``

    Here's a small demonstration:

    .. code-block:: python

       from humanfriendly.text import dedent
       from humanfriendly.terminal import html_to_ansi

       print(html_to_ansi(dedent('''
         <b>Hello world!</b>
         <i>Is this thing on?</i>
         I guess I can <u>underline</u> or <s>strike-through</s> text?
         And what about <span style="color: red">color</span>?
       ''')))

       rainbow_colors = [
           '#FF0000', '#E2571E', '#FF7F00', '#FFFF00', '#00FF00',
           '#96BF33', '#0000FF', '#4B0082', '#8B00FF', '#FFFFFF',
       ]
       html_rainbow = "".join('<span style="color: %s">o</span>' % c for c in rainbow_colors)
       print(html_to_ansi("Let's try a rainbow: %s" % html_rainbow))

    Here's what the results look like:

      .. image:: images/html-to-ansi.png

    Some more details:

    - Nested tags are supported, within reasonable limits.

    - Text in ``<code>`` and ``<pre>`` tags will be highlighted in a
      different color from the main text (currently this is yellow).

    - ``<a href="URL">TEXT</a>`` is converted to the format "TEXT (URL)" where
      the uppercase symbols are highlighted in light blue with an underline.

    - ``<div>``, ``<p>`` and ``<pre>`` tags are considered block level tags
      and are wrapped in vertical whitespace to prevent their content from
      "running into" surrounding text. This may cause runs of multiple empty
      lines to be emitted. As a *workaround* the :func:`__call__()` method
      will automatically call :func:`.compact_empty_lines()` on the generated
      output before returning it to the caller. Of course this won't work
      when `output` is set to something like :data:`sys.stdout`.

    - ``<br>`` is converted to a single plain text line break.

    Implementation notes:

    - A list of dictionaries with style information is used as a stack where
      new styling can be pushed and a pop will restore the previous styling.
      When new styling is pushed, it is merged with (but overrides) the current
      styling.

    - If you're going to be converting a lot of HTML it might be useful from
      a performance standpoint to re-use an existing :class:`HTMLConverter`
      object for unrelated HTML fragments, in this case take a look at the
      :func:`__call__()` method (it makes this use case very easy).

    .. versionadded:: 4.15
       :class:`humanfriendly.terminal.HTMLConverter` was added to the
       `humanfriendly` package during the initial development of my new
       `chat-archive <https://chat-archive.readthedocs.io/>`_ project, whose
       command line interface makes for a great demonstration of the
       flexibility that this feature provides (hint: check out how the search
       keyword highlighting combines with the regular highlighting).
    )divpprec                     |j                  dd      | _        |j                  dd      | _        t        j                  | g|i | y)a  
        Initialize an :class:`HTMLConverter` object.

        :param callback: Optional keyword argument to specify a function that
                         will be called to process text fragments before they
                         are emitted on the output stream. Note that link text
                         and preformatted text fragments are not processed by
                         this callback.
        :param output: Optional keyword argument to redirect the output to the
                       given file-like object. If this is not given a new
                       :class:`~python3:io.StringIO` object is created.
        r   Noutput)popr   r   r   __init__)selfargskws      r   r   zHTMLConverter.__init__{   s@     z40ffXt,D.4.2.r   c                     | j                          | j                  |       | j                          t        | j                  t
              r#t        | j                  j                               S y)a  
        Reset the parser, convert some HTML and get the text with ANSI escape sequences.

        :param data: The HTML to convert to text (a string).
        :returns: The converted text (only in case `output` is
                  a :class:`~python3:io.StringIO` object).
        N)resetfeedclose
isinstancer   r   r   getvaluer   r   s     r   __call__zHTMLConverter.__call__   sL     	

		$

dkk8,&t{{';';'=>> -r   c                 <    | j                   r| j                   d   S i S )z?Get the current style from the top of the stack (a dictionary).)stackr   s    r   current_stylezHTMLConverter.current_style   s     "&tzz"~33r   c                     t        | j                        r&| j                  j                  t               g | _        t        j                  |        y)ab  
        Close previously opened ANSI escape sequences.

        This method overrides the same method in the superclass to ensure that
        an :data:`.ANSI_RESET` code is emitted when parsing reaches the end of
        the input but a style is still active. This is intended to prevent
        malformed HTML from messing up terminal output.
        N)anyr(   r   writer	   r   r!   r)   s    r   r!   zHTMLConverter.close   s6     tzz?KKj)DJr   Nc                     | j                   j                  t               || j                  n|}|r%| j                   j                  t	        di |       yy)a)  
        Emit an ANSI escape sequence for the given or current style to the output stream.

        :param style: A dictionary with arguments for :func:`.ansi_style()` or
                      :data:`None`, in which case the style at the top of the
                      stack is emitted.
        N )r   r-   r	   r*   r
   )r   styles     r   
emit_stylezHTMLConverter.emit_style   sI     	*%&+m""KKj1512 r   c           	          | j                   j                  t        |j                  d      rt	        |dd d                   yt	        |                   y)z
        Process a decimal or hexadecimal numeric character reference.

        :param value: The decimal or hexadecimal value (a string).
        x   N   )r   r-   r   
startswithint)r   values     r   handle_charrefzHTMLConverter.handle_charref   sD     	&u7G7G7LU12Y!3]^RUV[R\]^r   c                     | j                   r|| _        n,| j                  r | j                  dk(  r| j                  |      }| j                  j                  |       y)zZ
        Process textual data.

        :param data: The decoded text (a string).
        r   N)link_url	link_textr   preformatted_text_levelr   r-   r$   s     r   handle_datazHTMLConverter.handle_data   sJ     == "DN]]t;;q@ ==&D$r   c                    |dv rD| j                   }| j                  r| j                  j                  d       | j                   }|dk(  r| j                  | j                  | j
                        r| j                  |       n| j                  |       | j                  j                  d       | j                  |       | j                  j                  | j                  | j
                               | j                  |       | j                  j                  d       n| j                  |       |dv r| xj                  dz  c_
        || j                  v r| j                  j                  d       y	y	)
zf
        Process the end of an HTML tag.

        :param tag: The name of the tag (a string).
        )abcodedelemiinsr   sstrongspanur'   r@   z ())rB   r   r4   

N)r*   r(   r   
urls_matchr<   r;   r1   r   r-   
render_urlr=   
BLOCK_TAGS)r   tag	old_style	new_styles       r   handle_endtagzHTMLConverter.handle_endtag   s    ``**I zz

r"**Icz??4>>4==AOOI.OOI.KK%%d+OOI.KK%%doodmm&DEOOI.KK%%c*	*o%,,1,$//!KKf% "r   c                 Z    | j                   j                  t        t        |                y)z|
        Process a named character reference.

        :param name: The name of the character reference (a string).
        N)r   r-   r   r   )r   names     r   handle_entityrefzHTMLConverter.handle_entityref   s      	&!567r   c                 "   || j                   v r| j                  j                  d       |dk(  r-| j                  ddd       t	        d |D        d      | _        y*|dk(  s|d	k(  r| j                  d
       y*|dk(  r| j                  j                  d       y*|dk(  s|dk(  r(| j                  d       | xj                  dz  c_        y*|dk(  s|dk(  r| j                  d       y*|dk(  s|dk(  r| j                  d       y*|dk(  s|dk(  r| j                  d       y*|dk(  ri }t	        d |D        d      }|j                  d      D ]  }|j                  d      \  }}}|j                         }|j                         }|dk(  r| j                  |      |d <   R|d!k(  r| j                  |      |d!<   l|d"k(  r|d#k(  rd|d#<   ||d$k(  r|d%k(  rd|d%<   |d&k(  r|d'k(  rd|d(<   |d&k(  s|d)k(  sd|d)<     | j                  d+i | y*y*),z
        Process the start of an HTML tag.

        :param tag: The name of the tag (a string).
        :param attrs: A list of tuples with two strings each.
        rL   r@   blueT)colorbright	underlinec              3   2   K   | ]  \  }}|d k(  s|  yw)hrefNr/   .0nvs      r   	<genexpr>z0HTMLConverter.handle_starttag.<locals>.<genexpr>  s     !E1f!    rA   rH   )boldbr
rB   r   yellow)rY   r4   rC   rG   )strike_throughrD   rE   )italicrF   rJ   )r[   rI   c              3   2   K   | ]  \  }}|d k(  s|  yw)r0   Nr/   r^   s      r   rb   z0HTMLConverter.handle_starttag.<locals>.<genexpr>   s     <edaqG|erc   ;:zbackground-color
backgroundrY   z
font-stylerj   zfont-weightre   ztext-decorationzline-throughri   r[   Nr/   )rO   r   r-   push_stylesnextr;   r=   split	partitionstripparse_color)	r   rP   attrsstylescssrulerU   _r8   s	            r   handle_starttagzHTMLConverter.handle_starttag   s&    $//!KKf%#:6$$G
 !!E!ErJDMCZ3(?$'D[KKd#F]cUl8,((A-(E\SCZD1D[C3JD)E\SCZt,F]F<e<bAC		#!%!4azz|--+/+;+;E+BF<(W_&*&6&6u&=F7O\)ex.?'+F8$]*u%)F6N..5N3J/3F+,..5K3G*.F;' '  D&v&' r   c                 0    t        j                  dd|      S )z
        Normalize a URL to enable string equality comparison.

        :param url: The URL to normalize (a string).
        :returns: The normalized URL (a string).
        ^mailto:rd   resubr   urls     r   normalize_urlzHTMLConverter.normalize_url3  s     vvj"c**r   c                    |j                  d      r=t        j                  d|      }t        |      dk(  rt	        t        t        |            S |j                  d      rr|dd }t        |      }|dk(  r,t        |dd d	      t        |dd
 d	      t        |d
d d	      fS |dk(  r,t        |d   d	      t        |d   d	      t        |d   d	      fS |j                         }|t        v r|S y)z
        Convert a CSS color to something that :func:`.ansi_style()` understands.

        :param value: A string like ``rgb(1,2,3)``, ``#AABBCC`` or ``yellow``.
        :returns: A color value supported by :func:`.ansi_style()` or :data:`None`.
        rgbz\d+   #r4   N      r5      r   )	r6   r~   findalllentuplemapr7   lowerr   )r   r8   tokenslengths       r   rt   zHTMLConverter.parse_color<  s    E"ZZ.F6{aSf-..c"!"IEZF{ bq	2&a
B'a
B' 
 1 a"%a"%a"%  $$L %r   c                     | j                   }|rt        |      }|j                  |       n|}| j                  j	                  |       | j                  |       y)a  
        Push new style information onto the stack.

        :param changes: Any keyword arguments are passed on to :func:`.ansi_style()`.

        This method is a helper for :func:`handle_starttag()`
        that does the following:

        1. Make a copy of the current styles (from the top of the stack),
        2. Apply the given `changes` to the copy of the current styles,
        3. Add the new styles to the stack,
        4. Emit the appropriate ANSI escape sequence to the output stream.
        N)r*   dictupdater(   appendr1   )r   changes	prototyperR   s       r   ro   zHTMLConverter.push_styles_  sL     &&	YIW%I

)$	"r   c                     t        j                  dd|      }t        j                  dd|      }t        j                  dd|      }|S )a  
        Prepare a URL for rendering on the terminal.

        :param url: The URL to simplify (a string).
        :returns: The simplified URL (a string).

        This method pre-processes a URL before rendering on the terminal. The
        following modifications are made:

        - The ``mailto:`` prefix is stripped.
        - Spaces are converted to ``%20``.
        - A trailing parenthesis is converted to ``%29``.
        r|   rd    z%20z\)$z%29r}   r   s     r   rN   zHTMLConverter.render_urlv  s?     ffZS)ffS%%ffVUC(
r   c                     t        j                  |        d| _        d| _        d| _        | j
                  t        | j
                  t              rt               | _        g | _        y)z
        Reset the state of the HTML parser and ANSI converter.

        When `output` is a :class:`~python3:io.StringIO` object a new
        instance will be created (and the old one garbage collected).
        Nr   )	r   r   r<   r;   r=   r   r"   r   r(   r)   s    r   r   zHTMLConverter.reset  sQ     	'($;;*T[[("C #*DK
r   c                 H    | j                  |      | j                  |      k(  S )a  
        Compare two URLs for equality using :func:`normalize_url()`.

        :param a: A string containing a URL.
        :param b: A string containing a URL.
        :returns: :data:`True` if the URLs are the same, :data:`False` otherwise.

        This method is used by :func:`handle_endtag()` to omit the URL of a
        hyperlink (``<a href="...">``) when the link text is that same URL.
        )r   )r   r@   rA   s      r   rM   zHTMLConverter.urls_match  s%     !!!$(:(:1(===r   N)__name__
__module____qualname____doc__rO   r   r%   propertyr*   r!   r1   r9   r>   rS   rV   rz   r   rt   ro   rN   r   rM   r/   r   r   r   r   %   sv    Ob %JE/&? 4 43_ $&@81'f+!F#.&&>r   r   r   )r   r~   humanfriendly.compatr   r   r   r   humanfriendly.textr   humanfriendly.terminalr   r	   r
   __all__r   r   r/   r   r   <module>r      s<    S 
 N M 2 K K , B>J B>r   