
    gE              
         d Z ddlmZ ddlZddlZddlZddlZddlmZ dZ	dZ
dZ ej                  d	      Z ej                  d
j                  ddddd            Z ej                  dj                  ddd            Z ej                  d       ej                  d       ej                  d       ej                  d       ej                  d       ej                  d       ej                  d       ej                  d      dZ ej                  d      Z ej                  d      Z G d dej(                        Zd$d%dZd&dZd'dZd(dZd)dZd*d Zd+d,d!Zd+d,d"Zd-d#Zy).a  
An implementation of `urlparse` that provides URL validation and normalization
as described by RFC3986.

We rely on this implementation rather than the one in Python's stdlib, because:

* It provides more complete URL validation.
* It properly differentiates between an empty querystring and an absent querystring,
  to distinguish URLs with a trailing '?'.
* It handles scheme, hostname, port, and path normalization.
* It supports IDNA hostnames, normalizing them to their encoded form.
* The API supports passing individual components, as well as the complete URL string.

Previously we relied on the excellent `rfc3986` package to handle URL parsing and
validation, but this module provides a simpler alternative, with less indirection
required.
    )annotationsN   )
InvalidURLi   zBABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~z!$&'()*+,;=z%[A-Fa-f0-9]{2}z(?:(?P<scheme>{scheme}):)?(?://(?P<authority>{authority}))?(?P<path>{path})(?:\?(?P<query>{query}))?(?:#(?P<fragment>{fragment}))?z([a-zA-Z][a-zA-Z0-9+.-]*)?z[^/?#]*z[^?#]*z[^#]*z.*scheme	authoritypathqueryfragmentzA(?:(?P<userinfo>{userinfo})@)?(?P<host>{host}):?(?P<port>{port})?z(\[.*\]|[^:@]*))userinfohostportz[^@]*z(\[.*\]|[^:]*))r   r   r	   r
   r   r   r   r   z ^[0-9]+\.[0-9]+\.[0-9]+\.[0-9]+$z^\[.*\]$c                      e Zd ZU ded<   ded<   ded<   ded<   ded<   ded	<   ded
<   edd       Zedd       ZddZddZy)ParseResultstrr   r   r   
int | Noner   r	   
str | Noner
   r   c                    dj                  | j                  r| j                   dndd| j                  v rd| j                   dn| j                  | j                  d| j                   g      S dg      S )N @:[])joinr   r   r   selfs    D/var/www/openai/venv/lib/python3.12/site-packages/httpx/_urlparse.pyr   zParseResult.authorityj   sz    ww'+}}4==/#"$'499$4!DII;a $))#'99#8!DII;
 	
 ?A
 	
    c                    dj                  d| j                  v rd| j                   dn| j                  | j                  d| j                   g      S dg      S )Nr   r   r   r   )r   r   r   r   s    r   netloczParseResult.netloct   sd    ww$'499$4!DII;a $))#'99#8!DII;
 	
 ?A
 	
r   c                    |s| S | j                   | j                  | j                  | j                  | j                  d}|j                  |       t        di |S )Nr   r   )r   r   r	   r
   r   updateurlparse)r   kwargsdefaultss      r   	copy_withzParseResult.copy_with}   sR    K kkIIZZ
 	'h''r   c                   | j                   }dj                  | j                  r| j                   dnd|rd| nd| j                  | j                  d| j                   nd| j
                  d| j
                   g      S dg      S )Nr   r   //?#)r   r   r   r	   r
   r   )r   r   s     r   __str__zParseResult.__str__   s    NN	ww%)[[4;;-q!b$-"YK 2		$(JJ$:!DJJ< '+}}'@!DMM?#
 	
 GI
 	
r   N)returnr   )r%   r   r-   r   )	__name__
__module____qualname____annotations__propertyr   r    r'   r,    r   r   r   r   a   sX    KM
I

I
 
 
 
(

r   r   c           	     N   t        |       t        kD  rt        d      t        d | D              r7t	        d | D              }| j                  |      }d|d| d}t        |      d|v r%|d   }t        |t              rt        |      n||d<   d|v r0|j                  d      xs d	}|j                  d
      \  |d<   }|d<   d|v sd|v rLt        |j                  dd	      xs d	      }t        |j                  dd	      xs d	      }	|	r| d
|	 n||d<   d|v r7|j                  d      xs d	}
|
j                  d      \  |d<   }|d<   |sd |d<   d|v rD|j                  d      xs d	}d
|v r+|j                  d      r|j                  d      s	d| d|d<   |j                         D ]  \  }}|	t        |      t        kD  rt        d| d      t        d |D              r:t	        d |D              }|j                  |      }d| d|d| d}t        |      t         |   j#                  |      rt        d| d       t$        j'                  |       }|J |j)                         }|j                  d|d         xs d	}|j                  d|d         xs d	}|j                  d|d         xs d	}|j                  d|d         }|j                  d|d         }t*        j'                  |      }|J |j)                         }|j                  d|d         xs d	}|j                  d|d         xs d	}|j                  d|d         }|j-                         }t        |t.        d
z          }t1        |      }t3        ||      }|d	k7  }|d	k7  xs |d	k7  xs |d u}t5        |||!       |s|rt7        |      }d"}t        |t.        |z   d#z          } |d nt        |t.        |z   d$z          }!|d nt        |t.        |z   d%z          }"t9        ||||| |!|"      S )&NzURL too longc              3  b   K   | ]'  }|j                         xr |j                           ) y wNisasciiisprintable.0chars     r   	<genexpr>zurlparse.<locals>.<genexpr>   s+     
E4<<>4$"2"2"444   -/c              3  d   K   | ](  }|j                         s|j                         r%| * y wr6   r7   r:   s     r   r=   zurlparse.<locals>.<genexpr>   s$     VSTDLLN4CSCSCUDS   000z.Invalid non-printable ASCII character in URL, z at position .r   r    r   r   r   usernamepasswordr   raw_pathr*   r	   r
   r   r   zURL component 'z
' too longc              3  b   K   | ]'  }|j                         xr |j                           ) y wr6   r7   r:   s     r   r=   zurlparse.<locals>.<genexpr>   s+     O4<<><$*:*:*<&<<r>   c              3  d   K   | ](  }|j                         s|j                         r%| * y wr6   r7   r:   s     r   r=   zurlparse.<locals>.<genexpr>   s'      %*TdllnTEUEUEWDUr@   z-Invalid non-printable ASCII character in URL z component, zInvalid URL component ''r   r   r   safe)
has_schemehas_authorityz`{}%|^\"z:/[]@z:/?[]@z:/?#[]@)lenMAX_URL_LENGTHr   anynextfind
isinstanceintr   pop	partitionquoteget
startswithendswithitemsCOMPONENT_REGEX	fullmatch	URL_REGEXmatch	groupdictAUTHORITY_REGEXlower
SUB_DELIMSencode_hostnormalize_portvalidate_pathnormalize_pathr   )#urlr%   r<   idxerrorr   r    _rB   rC   rD   	seperatorr   keyvalue	url_matchurl_dictr   r   r	   r
   r   authority_matchauthority_dictr   parsed_schemeparsed_userinfoparsed_hostparsed_portrJ   rK   WHATWG_SAFEparsed_pathparsed_queryparsed_fragments#                                      r   r$   r$      s   
 3x. (( 
E
EEVSVVhhtn<THMRUQVVWX 	  f~&0s&;Tv 6H%+,2,<,<S,A)v6&> VzV3J39r:J39r:9Az8*5xz V::j)/R5=5G5G5L2v	6'?"F7O zz&!'R$; 4s9K a[F6N
 lln
U5zN* ?3%z!BCC OOO %*  jj&CC5hmC53  !'' #3'11%8 #:3%q!ABB) %. $I   ""$H ZZ(8"45;F

;(=>D"I::fhv./52DJJw 12Ezz*hz&:;H &++I6O&&&$..0N zz*nZ&@AGRH::fnV45;D::fnV45D
  M 
S0@AO"4(K,T6:K"$J2M!2Mk6M  $:]K]d#
 K T
[(@7(JKK = 	5zK7(BC   	8*{":Y"FG   r   c                t   | syt         j                  |       r	 t        j                  |        | S t        j                  |       r	 t        j                  | dd        | dd S | j                         r$d}t        | j                         t        |z         S 	 t        j                  | j                               j                  d      S # t        j                  $ r t        d|       w xY w# t        j                  $ r t        d|       w xY w# t        j                  $ r t        d	|       w xY w)
Nr   zInvalid IPv4 address: r   zInvalid IPv6 address: z"`{}%|\rH   asciizInvalid IDNA hostname: )IPv4_STYLE_HOSTNAMEr]   	ipaddressIPv4AddressAddressValueErrorr   IPv6_STYLE_HOSTNAMEIPv6Addressr8   rU   r`   ra   idnaencodedecode	IDNAError)r   ru   s     r   rb   rb   /  s2   		"	"4	(	@!!$' 		"	"4	(	@!!$q*- Abz	 !TZZ\
[(@AA={{4::<(//88= ** 	@5dX>??	@ ** 	@5dX>??	@  >> =24(;<<=s#   C C0 1D "C-0"D"D7c                    | | dk(  ry 	 t        |       }ddddddj                  |      }||k(  ry |S # t        $ r t        d|       w xY w)Nr   zInvalid port:    P   i  )ftphttphttpswswss)rR   
ValueErrorr   rV   )r   r   port_as_intdefault_ports       r   rc   rc   ^  sv     |trz4$i
 rCr#NRRL l"  4>$2334s	   4 Ac                    |r| r| j                  d      st        d      |s<|s9| j                  d      rt        d      | j                  d      rt        d      yyy)z
    Path validation rules that depend on if the URL contains
    a scheme or authority component.

    See https://datatracker.ietf.org/doc/html/rfc3986.html#section-3.3
    /z7For absolute URLs, path must be empty or begin with '/'r)   z3Relative URLs cannot have a path starting with '//'r   z2Relative URLs cannot have a path starting with ':'N)rW   r   )r	   rJ   rK   s      r   rd   rd   y  sh      ,VWWm ??4 RSS ??3QRR   ,:r   c                    d| vr| S | j                  d      }d|vrd|vr| S g }|D ]9  }|dk(  r	|dk(  r|s|dgk7  s|j                          )|j                  |       ; dj                  |      S )z
    Drop "." and ".." segments from a URL path.

    For example:

        normalize_path("/path/./to/somewhere/..") == "/path/to"
    rA   r   z..r   )splitrS   appendr   )r	   
componentsoutput	components       r   re   re     s     $CJ *Z!7 F	$&RD.

MM)$   88Fr   c                p    dj                  | j                  d      D cg c]  }d|d
 c}      S c c}w )Nr   zutf-8%02X)r   r   )stringbytes     r   PERCENTr     s6    77v}}W/EF/EtaSzN/EFGGFs   3c                    t         |z   }| j                  |      s| S dj                  | D cg c]  }||v r|n
t        |       c}      S c c}w )z1
    Use percent-encoding to quote a string.
    r   )UNRESERVED_CHARACTERSrstripr   r   )r   rI   NON_ESCAPED_CHARSr<   s       r   percent_encodedr     sZ     .4 ==*+77JPQ&$**	=&Q Qs   Ac                   g }d}t        j                  t        |       D ]l  }|j                         |j	                         }}|j                  d      }||k7  r!| || }|j                  t        ||             |j                  |       |}n |t        |       k7  r!| |d }	|j                  t        |	|             dj                  |      S )a  
    Use percent-encoding to quote a string, omitting existing '%xx' escape sequences.

    See: https://www.rfc-editor.org/rfc/rfc3986#section-2.1

    * `string`: The string to be percent-escaped.
    * `safe`: A string containing characters that may be treated as safe, and do not
        need to be escaped. Unreserved characters are always treated as safe.
        See: https://www.rfc-editor.org/rfc/rfc3986#section-2.3
    r   rH   Nr   )
refinditerPERCENT_ENCODED_REGEXstartendgroupr   r   rL   r   )
r   rI   partscurrent_positionr]   start_positionend_positionmatched_textleading_texttrailing_texts
             r   rU   rU     s     E2F;',{{}eiik{{1~--!"2>BLLLDAB 	\"' < 3v;&/01_]>?775>r   c                    dj                  | D cg c]#  \  }}t        |d      dz   t        |d      z   % c}}      S c c}}w )am  
    We can use a much simpler version of the stdlib urlencode here because
    we don't need to handle a bunch of different typing cases, such as bytes vs str.

    https://github.com/python/cpython/blob/b2f7b2ef0b5421e01efb8c7bee2ef95d3bab77eb/Lib/urllib/parse.py#L926

    Note that we use '%20' encoding for spaces. and '%2F  for '/'.
    This is slightly different than `requests`, but is the behaviour that browsers use.

    See
    - https://github.com/encode/httpx/issues/2536
    - https://github.com/encode/httpx/issues/2721
    - https://docs.python.org/3/library/urllib.parse.html#urllib.parse.urlencode
    &r   rH   =)r   r   )rY   kvs      r   	urlencoder     sS     88 	
1 AB'#-0KK	
 	
s   (A
r"   )rf   r   r%   r   r-   r   )r   r   r-   r   )r   zstr | int | Noner   r   r-   r   )r	   r   rJ   boolrK   r   r-   None)r	   r   r-   r   )r   r   r-   r   )r   )r   r   rI   r   r-   r   )rY   zlist[tuple[str, str]]r-   r   )__doc__
__future__r   r}   r   typingr   _exceptionsr   rM   r   ra   compiler   formatr\   r_   rZ   r|   r   
NamedTupler   r$   rb   rc   rd   re   r   r   rU   r   r3   r   r   <module>r      s  $ #  	   # I  
"

#45  BJJ	*
 f+  	& "**Tf 	  	  bjj56I&BJJx RZZ 

4 

7#BJJ)*BJJt	 !bjj!DE  bjj- 4
&## 4
nTn,=^6S2>HBr   