
    g<                         d dl Z d dlZd dlZd dlmZmZmZmZmZ d dl	Z	d dl
mZ d dlZd dlmZ d dlmZ d dlmZ d dlmZmZ d dlmZ d d	lmZ d d
lmZmZmZmZ  G d de      Z G d de      Zy)    N)AnyListOptionalTupleUnion)	BaseModel)
get_secret)verbose_proxy_logger)	DualCache)CustomGuardraillog_guardrail_information)UserAPIKeyAuth)GuardrailEventHooks)EmbeddingResponseImageResponseModelResponseStreamingChoicesc                   &    e Zd ZU dZdZee   ed<   y)PresidioPerRequestConfigzD
    presdio params that can be controlled per request, api key
    Nlanguage)__name__
__module____qualname____doc__r   r   str__annotations__     f/var/www/openai/venv/lib/python3.12/site-packages/litellm/proxy/guardrails/guardrail_hooks/presidio.pyr   r   %   s     #Hhsm"r   r   c                   |    e Zd ZdZdZ	 	 	 	 	 	 	 ddedee   dee   dee   dee   dee   dee   f fd	Z		 	 ddee   dee   fd
Z
dededee   dedef
dZededededefd       Zededededeeef   fd       Zededededeeef   fd       Zedededeeeef   fd       Zdedee   fdZd Z xZS )_OPTIONAL_PresidioPIIMaskingNmock_testingmock_redacted_textpresidio_analyzer_api_basepresidio_anonymizer_api_baseoutput_parse_piipresidio_ad_hoc_recognizerslogging_onlyc                 $   |du rd| _         t        j                   |d<   t        |   d
i | i | _        || _        |xs d| _        |du ry |}	|	0	 t        |	d      5 }
t        j                  |
      | _
        d d d        | j                  ||	       y # 1 sw Y   xY w# t        $ r t        d|	       t        j                  $ r}t        dt        |       d|	       d }~wt        $ r}t        dt        |       d|	       d }~ww xY w)NT
event_hookFrzFile not found. file_path=zError decoding JSON file: z, file_path=zAn error occurred: )r$   r%   r   )r(   r   super__init__
pii_tokensr#   r&   openjsonloadad_hoc_recognizersFileNotFoundError	ExceptionJSONDecodeErrorr   validate_environment)selfr"   r#   r$   r%   r&   r'   r(   kwargsr2   filee	__class__s               r   r-   z%_OPTIONAL_PresidioPIIMasking.__init__2   s<    4 $D#6#C#CF< "6" 	 #5 0 9E48),c2d.2iioD+ 3 	!!'A)E 	" 	
 32$ S"<=O<P QRR'' 0QEWDXY   )#a&>P=QR s<   B! B9B! BB! !)D
C$$D0D

Dc                    |xs t        dd       | _        |xs t        j                   dd       | _        | j                  t	        d      | j                  j                  d      s| xj                  dz  c_        | j                  j                  d      s/| j                  j                  d      sd| j                  z   | _        | j                  t	        d      | j                  j                  d      s| xj                  dz  c_        | j                  j                  d      s1| j                  j                  d      sd| j                  z   | _        y y y )NPRESIDIO_ANALYZER_API_BASEPRESIDIO_ANONYMIZER_API_BASEz5Missing `PRESIDIO_ANALYZER_API_BASE` from environment/zhttp://zhttps://z7Missing `PRESIDIO_ANONYMIZER_API_BASE` from environment)r	   r$   litellmr%   r4   endswith
startswith)r7   r$   r%   s      r   r6   z1_OPTIONAL_PresidioPIIMasking.validate_environment]   sK    'X*5QSW*X 	'
 ) 
G,>,>*D-
 	) **2STT..77<++s2+++66yA..99*E D;;; + ,,4UVV0099#>--4---88C00;;JG D=== - H Dr   textpresidio_configrequest_datareturnc                   K   	 t        j                         4 d{   }| j                  | j                  }n8| j                   d}|dd}|r|j                  r|j                  |d<   | j
                  | j
                  |d<   |j                  | j                  |             d}t        j                  d||       |j                  ||	      4 d{   }	|	j                          d{   }
ddd      d{    | j                   d
}t        j                  d|       |
d}|j                  ||	      4 d{   }	|	j                          d{   }ddd      d{    |}|t        j                  d|       |d   D ]k  }|d   }|d   }|d   }|d   dk(  rD|du r@|| j                  v r |t        t        j                                z   }||| | j                  |<   |d| |z   ||d z   }m |d   cddd      d{    S t#        d|       7 7 O7 :7 -# 1 d{  7  sw Y   >xY w7 7 7 # 1 d{  7  sw Y   xY w7 P# 1 d{  7  sw Y   yxY w# t"        $ r}|d}~ww xY ww)zO
        [TODO] make this more performant for high-throughput scenario
        Nanalyzeen)rC   r   r   r2   )rE   z&Making request to: %s with payload: %s)r0   	anonymizezMaking request to: %s)rC   analyzer_resultszredacted_text: %sitemsstartendrC   operatorreplaceTzInvalid anonymizer response: )aiohttpClientSessionr#   r$   r   r2   update)get_guardrail_dynamic_request_body_paramsr
   debugpostr0   r%   r.   r   uuiduuid4r4   )r7   rC   r&   rD   rE   sessionredacted_textanalyze_urlanalyze_payloadresponseanalyze_resultsanonymize_urlanonymize_payloadnew_textitemrM   rN   replacementr:   s                      r   	check_piiz&_OPTIONAL_PresidioPIIMasking.check_pii   s    B	,,..'**6$($;$;M &*%D%D$EW"MK/3&FO&?+C+C6E6N6N
3..:@D@W@W(<=#**FF)5 G 
 %)M(..@#'
  '||#/  ,    !08*?	    (,'H'H&I$SM(../FV $,;)%
  '||%,=  ,    !.6mmo(=   
   ,(../BMR -g 6 $W"5k&*6l
+y8=MQU=U  +doo=.9C

<M.M;C %c<DOOK8 $,FU#3k#AHSTN#R !7 )0{ /..~ $&CM?$STT /0  +@	         )>       M /...@  	G	s0  JI8 H$I8 B8I#H'I#H00H*1H05I# H-AI#II#
III#I#.I
/BI#I8 I!I8 JI#$I8 'I#*H0-I#0I	6H97I	>	I#I
I#I	II	I#!I8 #I5)I,*I51I8 4J5I8 8	JJJJuser_api_key_dictcachedata	call_typec           	      4  K   	 |j                  dd      }t        j                  d|       | j                  |      }|dk(  r|d   }g }|D ]G  }	t	        |	d   t
              s|j                  | j                  |	d   | j                  ||             I t        j                  |  d{   }
t        |
      D ]$  \  }}t	        ||   d   t
              s|||   d<   & t        j                  d|d           ||d<   |S 7 X# t        $ r}|d}~ww xY ww)	a  
        - Check if request turned off pii
            - Check if user allowed to turn off pii (key permissions -> 'allow_pii_controls')

        - Take the request data
        - Call /analyze -> get the results
        - Call /anonymize w/ the analyze results -> get the redacted text

        For multiple messages in /chat/completions, we'll need to call them in parallel.
        content_safetyNzcontent_safety: %s
completionmessagescontentrC   r&   rD   rE   ,Presidio PII Masking: Redacted pii message: )getr
   rU   'get_presidio_settings_from_request_data
isinstancer   appendrd   r&   asynciogather	enumerateinfor4   )r7   re   rf   rg   rh   rj   rD   rl   tasksm	responsesindexr+   r:   s                 r   async_pre_call_hookz0_OPTIONAL_PresidioPIIMasking.async_pre_call_hook   s:    & 	!XX&6=N &&';^L"JJ4POL(
+!A!!I,4 NN%&y\151F1F0?-1	 +  " #*..%"88	 )) 4HE1!(5/)"<cB  !% !5
 %))B4
CSBTU $,Z K 9  	G	sH   DAD "AD *D++D +D DD 	DDDDr8   resultc                      ddl m}  fd}	 t        j                         } |d      5 }|j	                  |      }|j                         cd d d        S # 1 sw Y   y xY w# t        $ r
  |       cY S w xY w)Nr   )ThreadPoolExecutorc                  <   t        j                         } 	 t        j                  |        | j                  j	                              | j                          t        j                  d       S # | j                          t        j                  d       w xY w)z9Run the coroutine in a new event loop within this thread.)r8   r}   rh   N)rt   new_event_loopset_event_looprun_until_completeasync_logging_hookclose)new_looprh   r8   r}   r7   s    r   run_in_new_loopzB_OPTIONAL_PresidioPIIMasking.logging_hook.<locals>.run_in_new_loop  s    --/H	-&&x022++%f	 ,   &&t,  &&t,s   7A4 4'B   )max_workers)concurrent.futuresr   rt   get_running_loopsubmitr}   RuntimeError)	r7   r8   r}   rh   r   r   _executorfutures	   ````     r   logging_hookz)_OPTIONAL_PresidioPIIMasking.logging_hook  sh     	:	-	%((*A $2h!9}} 322  	%"$$	%s.   A( !A	A( A%!A( %A( (A;:A;c           	        K   |dk(  s|dk(  r|j                  dd      }g }|||fS | j                  |      }|D ]G  }d}|d   t        |d   t              s|d   }|j	                  | j                  |d||             I t        j                  |  d{   }	t        |	      D ]$  \  }
}t        ||
   d   t              s|||
   d<   & t        j                  d	|        ||d<   ||fS 7 Ww)
zK
        Masks the input before logging to langfuse, datadog, etc.
        rk   acompletionrl   N rm   Frn   ro   )rp   rq   rr   r   rs   rd   rt   ru   rv   r
   rw   )r7   r8   r}   rh   rl   rx   rD   ry   text_strrz   r{   r+   s               r   r   z/_OPTIONAL_PresidioPIIMasking.async_logging_hook)  s,     %m)C'-zz*d'CHEv~%"JJ6ROY<'a	lC0 |HLL!)-2,;)/	 '   &nne44I%i0qhuoi8#>  UO! 1
 !%%>xjI "*F:v~ 5s   AC3A C3C1+C3,C3r]   c                   K   t        j                  d| j                   dt        |              | j                  du rt        j                  du r|S t        |t              rt        |j                  d   t              st        |j                  d   j                  j                  t              rt        j                  d| j                   d|j                  d   j                  j                          | j                  j                         D ]T  \  }}|j                  d   j                  j                  j                  ||      |j                  d   j                  _
        V |S w)ze
        Output parse the response object to replace the masked tokens with user sent values
        z(PII Masking Args: self.output_parse_pii=z; type of response=Fr   zself.pii_tokens: z; initial response: )r
   rU   r&   typer@   rr   r   choicesr   messagerm   r   r.   rL   rP   )r7   rg   re   r]   keyvalues         r   async_post_call_success_hookz9_OPTIONAL_PresidioPIIMasking.async_post_call_success_hookV  s<     	""6t7L7L6MM`aefnao`pq	
   E)g.F.F%.OOh.zQ!18
 (**1-55==sC$**''88LXM]M]^_M`MhMhMpMpLqr #'//"7"7"9JC:B:J:J;gggggc5&9 $$Q'//7 #: s   E)E+c                 V    d|v r%|d   }|j                  d      }|rt        di |}|S y )Nmetadataguardrail_configr   )rp   r   )r7   rg   	_metadata_guardrail_config_presidio_configs        r   rq   zD_OPTIONAL_PresidioPIIMasking.get_presidio_settings_from_request_datat  sA     Z(I ).@ A #;#P>O#P ''r   c                     	 t        j                  |       t        j                  rt	        |       y y # t
        $ r Y y w xY w)N)r
   rU   r@   set_verboseprintr4   )r7   print_statements     r   print_verbosez*_OPTIONAL_PresidioPIIMasking.print_verbose  s=    	 &&7""o& # 		s   04 	A A )FNNNFNN)NN)r   r   r   user_api_key_cacher2   boolr   dictr   r-   r6   r   rd   r   r   r   r|   r   r   r   r   r   r   r   r   r   rq   r   __classcell__)r;   s   @r   r!   r!   -   s   
 #-1486:+059'+)
)
 %TN)
 %-SM	)

 '/sm)
 #4.)
 &.c])
 tn)
Z 596:&$,SM& '/sm&PLL L "":;	L
 L 
L\ 2)2 2 	2
 2 2h %%$'%47%	tSy	% %@ **$'*47*	tSy	* *X  * '8-GH	 :

	*	+
r   r!   ) rt   r0   rW   typingr   r   r   r   r   rQ   pydanticr   r@   r	   litellm._loggingr
   litellm.caching.cachingr   %litellm.integrations.custom_guardrailr   r   litellm.proxy._typesr   litellm.types.guardrailsr   litellm.utilsr   r   r   r   r   r!   r   r   r   <module>r      sY       4 4     1 - 0 8 #y #Y? Yr   