
    gg                         d dl Z d dlmZmZmZmZmZmZ d dlZd dl	Z	d dl	m
Z
 d dlmZmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZmZ d d
lmZmZ er	d dlmZ eZneZ G d de      Z G d de      Z y)    N)TYPE_CHECKINGAnyDictListOptionalUnion)token_counter)verbose_loggerverbose_router_logger)	DualCache)CustomLogger)!_get_parent_otel_span_from_kwargs)RouterErrors)LiteLLMPydanticObjectBaseStandardLoggingPayload)get_utc_datetimeprint_verbose)Spanc                       e Zd ZU dZeed<   y)RoutingArgs<   ttlN)__name__
__module____qualname__r   int__annotations__     ^/var/www/openai/venv/lib/python3.12/site-packages/litellm/router_strategy/lowest_tpm_rpm_v2.pyr   r      s    Cr   r   c                      e Zd ZU dZdZeed<   dZeed<   dZ	eed<   dZ
eed<   i fd	ed
edefdZdedee   fdZdedee   dee   fdZd Zd Zdee   dededefdZ	 	 d$dedededee   dedee   deeeeef         d eeeef      dee   fd!Z	 	 d$dededeeeeef         d eeeef      fd"Z	 	 	 d%dededeeeeef         d eeeef      dee   f
d#Zy)&LowestTPMLoggingHandler_v2z
    Updated version of TPM/RPM Logging.

    Meant to work across instances.

    Caches individual models, not model_groups

    Uses batch get (redis.mget)

    Increments tpm/rpm limit using redis.incr
    F	test_flagr   logged_successlogged_failurei  default_cache_time_secondsrouter_cache
model_listrouting_argsc                 @    || _         || _        t        di || _        y )Nr   )r'   r(   r   r)   )selfr'   r(   r)   s       r    __init__z#LowestTPMLoggingHandler_v2.__init__0   s#     )$'7,7r   
deploymentreturnc                    	 t               }|j                  d      }|j                  di       j                  d      }| d| }| j                  j	                  |d      }d}||j                  d      }|!|j                  d	i       j                  d      }|!|j                  di       j                  d      }|t        d
      }|||k\  rt        j                  dj                  ||      d|j                  d	i       j                  d      t        j                  ddj                  t        j                  j                  ||||j                  dd            t        j                  dd                  | j                  j                  |d| j                   j"                        }|||kD  rt        j                  dj                  ||      d|j                  d	i       j                  d      t        j                  ddj                  t        j                  j                  ||      t        j                  dd                  |S # t$        $ r(}	t'        |	t        j                        r|	|cY d}	~	S d}	~	ww xY w)z
        Pre-call check + update model rpm

        Returns - deployment

        Raises - RateLimitError if deployment over defined RPM limit
        %H-%M
model_infoid:rpm:Tkey
local_onlyNrpmlitellm_paramsinf6Deployment over defined rpm limit={}. current usage={} model  zr{} rpm limit={}. current usage={}. id={}, model_group={}. Get the model info by calling 'router.get_model_info(id)
model_nametpm_rpm_limits"https://github.com/BerriAI/litellmmethodurl)status_codecontentrequestmessagellm_providerr<   response   r5   valuer   !{} rpm limit={}. current usage={})r   strftimegetr'   	get_cachefloatlitellmRateLimitErrorformathttpxResponser   user_defined_ratelimit_errorrM   Requestincrement_cacher)   r   	Exception
isinstance)
r+   r-   dtcurrent_minutemodel_idrpm_keylocal_resultdeployment_rpmresultes
             r    pre_call_checkz)LowestTPMLoggingHandler_v2.pre_call_check7   s   D	
 "#B[[1N!~~lB7;;DAH!
%'78G,,66 7 L "N%!+!6%!+0@"!E!I!I%!P%!+b!A!E!Ee!L%!&u'LN,J,,T[[& "$$..)92>BB7K"^^$' !U  !\  !\(EEKK*($&NN<<! !&5EKo p
 ( **::qd.?.?.C.C ;  %&>*A!00 X _ _*F! &((nn-=rBFFwO!&(+$G$N$N , I I O O . &%
 %*MM9IOs$t"    	!W334	s   II 	JJ<JJparent_otel_spanc                 T  K   	 t               }|j                  d      }|j                  di       j                  d      }| d| }| j                  j	                  |d       d{   }d}||j                  d      }|!|j                  d	i       j                  d      }|!|j                  di       j                  d      }|t        d
      }|||k\  rt        j                  dj                  ||      d|j                  d	i       j                  d      t        j                  ddj                  t        j                  j                  ||      dt        d      it        j                  dd                  | j                  j!                  |d| j"                  j$                  |       d{   }	|	|	|kD  rt        j                  dj                  ||	      d|j                  d	i       j                  d      t        j                  ddj                  t        j                  j                  ||	      dt        d      it        j                  dd                  |S 7 7 # t&        $ r(}
t)        |
t        j                        r|
|cY d}
~
S d}
~
ww xY ww)aI  
        Pre-call check + update model rpm
        - Used inside semaphore
        - raise rate limit error if deployment over limit

        Why? solves concurrency issue - https://github.com/BerriAI/litellm/issues/2994

        Returns - deployment

        Raises - RateLimitError if deployment over defined RPM limit
        r0   r1   r2   r3   Tr4   Nr7   r8   r9   r:   r;   r<   r=   rN   retry-afterr   r?   r@   rA   rD   rE   headersrF   rG   rK   r5   rM   r   rf   )r   rO   rP   r'   async_get_cacherR   rS   rT   rU   rV   rW   r   rX   rM   strrY   async_increment_cacher)   r   r[   r\   )r+   r-   rf   r]   r^   r_   r`   ra   rb   rc   rd   s              r    async_pre_call_checkz/LowestTPMLoggingHandler_v2.async_pre_call_check   s    G	
 "#B[[1N!~~lB7;;DAH!
%'78G!%!2!2!B!B "C " L "N%!+!6%!+0@"!E!I!I%!P%!+b!A!E!Ee!L%!&u'LN,J,,T[[& "$$..)92>BB7K"^^$' C J J(EEKK*(!
 "/B 8 %5EKo p	 &  $00FF))--%5	  G    %&>*A!00 X _ _*F! &((nn-=rBFFwO!&(+$G$N$N , I I O O . &%
 &3CG$<$)MM9IOs$t	" $ uB4  	!W334	s[   J(A#I4 'I/(EI4 9I2:B4I4 .J(/I4 2I4 4	J%=J J%J( J%%J(c                 \   	 	 |j                  d      }|t        d      |j                  d      }|j                  d      }||y t        |t              rt	        |      }|j                  d      }t               }	|	j                  d      }
| d|
 }| j                  j                  ||| j                  j                         | j                  r| xj                  d	z  c_        y y # t        $ r7}t        j                  d
j!                  t	        |                   Y d }~y d }~ww xY w)Nstandard_logging_object&standard_logging_object not passed in.model_groupr_   total_tokensr0   :tpm:rL   rK   zUlitellm.proxy.hooks.lowest_tpm_rpm_v2.py::log_success_event(): Exception occured - {})rP   
ValueErrorr\   r   rm   r   rO   r'   rZ   r)   r   r#   r$   r[   r
   	exceptionrU   )r+   kwargsresponse_obj
start_timeend_timerq   rs   r2   rt   r]   r^   tpm_keyrd   s                r    log_success_eventz,LowestTPMLoggingHandler_v2.log_success_event   s:   -	 IO

)I# '. !IJJ155mDK(,,Z8B"bjB$W266~FL
 "#B[[N E.!12G --<T5F5F5J5J .  ~~##q(#  	$$gnnF
 	s   AC+ B!C+ +	D+4-D&&D+c                   K   	 	 |j                  d      }|t        d      |j                  d      }|j                  d      }||y t        |t              rt	        |      }|j                  d      }t               }	|	j                  d      }
| d|
 }t        |      }| j                  j                  ||| j                  j                  |       d {    | j                  r| xj                  d	z  c_        y y 7 '# t        $ r7}t        j                   d
j#                  t	        |                   Y d }~y d }~ww xY ww)Nrq   rr   rs   r_   rt   r0   ru   rk   rK   z[litellm.proxy.hooks.lowest_tpm_rpm_v2.py::async_log_success_event(): Exception occured - {})rP   rv   r\   r   rm   r   rO   r   r'   rn   r)   r   r#   r$   r[   r
   rw   rU   )r+   rx   ry   rz   r{   rq   rs   r2   rt   r]   r^   r|   rf   rd   s                 r    async_log_success_eventz2LowestTPMLoggingHandler_v2.async_log_success_event  s_    /	 IO

)I# '. !IJJ155mDK(,,Z8B"bjB$W266~FL "#B[[N E.!12G
  AH##99"%%))!1	 :    ~~##q(#   	$$mttF
 	sN   EAD 	E
BD D%D ?ED 	E-D>9E>EEhealthy_deploymentsall_deploymentsinput_tokensrpm_dictc                    t        d      }g }|j                         D ]M  \  }}d }	|j                  d      d   }|D ]  }
||
d   d   k(  s|
}	 |	5|8d }||	j                  d      }|!|	j                  di       j                  d      }|!|	j                  di       j                  d      }|t        d      }d }||	j                  d      }|!|	j                  di       j                  d      }|!|	j                  di       j                  d      }|t        d      }||z   |kD  r|||v r||   ||   d	z   |k\  r*||k(  r|j	                  |	       B||k  sI|}|	g}P |S )
Nr9   :r   r1   r2   tpmr8   r7   rK   )rR   itemssplitrP   append)r+   r   r   r   r   
lowest_tpmpotential_deploymentsitemitem_tpm_deploymentm_deployment_tpm_deployment_rpms                r    _return_potential_deploymentsz8LowestTPMLoggingHandler_v2._return_potential_deployments>  s    5\
 "-335ND(K::c?1%D(1\?400"#K ) "!"O&"-//%"8&"-//2BB"G"K"KE"R&"-//,"C"G"G"N&"',"O&"-//%"8&"-//2BB"G"K"KE"R&"-//,"C"G"G"N&"',,&8%$(*:TN.d^a'?:Z'%,,[9J&%
)4%W 6X %$r   Nrs   tpm_keys
tpm_valuesrpm_keys
rpm_valuesmessagesinputc	                 8   ||yi }	t        |      D ]"  \  }
}||
   |	||
   j                  d      d   <   $ i }t        |      D ]"  \  }
}||
   |||
   j                  d      d   <   $ 	 t        ||      }t	        j
                  d|        |	i }	|D ]  }d|	|d   d   <    n|D ]  }|d   d   }||	vs|	|   d|	|<    |	}| j                  ||||      }t        d	       t        |      dkD  rt        j                  |      S y# t        $ r d}Y w xY w)
za
        Common checks for get available deployment, across sync + async implementations
        Nr   r   )r   textzinput_tokens=r1   r2   )r   r   r   r   z+returning picked lowest tpm/rpm deployment.)	enumerater   r	   r[   r   debugr   r   lenrandomchoice)r+   rs   r   r   r   r   r   r   r   tpm_dictidxr5   r   r   r-   dr|   r   r   s                      r    #_common_checks_available_deploymentz>LowestTPMLoggingHandler_v2._common_checks_available_deploymentu  s}    !3!(+HC4>sOHXc]((-a01 , !(+HC4>sOHXc]((-a01 ,	((GL 	##mL>$BC
 H1
;<L1$78 2 )L/$/(*hw.?.G()HW%	 ) # $ B B 3+%	 !C !
 	CD$%)==!677?  	L	s   +D DDc                   K   t        j                  d| d|        t               }|j                  d      }g }g }|D ]z  }	t	        |	t
              s|	j                  di       j                  d      }
dj                  |
|      }dj                  |
|      }|j                  |       |j                  |       | ||z   }| j                  j                  |       d	{   }||d	t        |       }|t        |      d	 }nd	}d	}| j                  ||||||||
      }	 |J |S 7 G# t        $ r i }t        |      D ]*  \  }}t	        |t
              s|j                  di       j                  d      }
d	}||j                  dd	      }|"|j                  di       j                  dd	      }|"|j                  di       j                  dd	      }|t        d      }|r||   nd}d	}||j                  dd	      }|"|j                  di       j                  dd	      }|"|j                  di       j                  dd	      }|t        d      }|r||   nd}||||d||
<   - t!        j"                  t$        j&                  j(                   d| d| d|t+        j,                  dddt/        d      it+        j0                  dd                  w xY ww)zz
        Async implementation of get deployments.

        Reduces time to retrieve the tpm/rpm values from cache
        6get_available_deployments - Usage Based. model_group: , healthy_deployments: r0   r1   r2   	{}:tpm:{}	{}:rpm:{})keysNrs   r   r   r   r   r   r   r   r   r8   r9   r   r7   current_tpm	tpm_limitcurrent_rpm	rpm_limitz. 12345 Passed model=. Deployments=r;   r=   rh   r   r?   r@   rA   ri   rG   )r   r   r   rO   r\   dictrP   rU   r   r'   async_batch_get_cacher   r   r[   r   rR   rS   rT   r   no_deployments_availablerM   rV   rW   rm   rY   )r+   rs   r   r   r   r]   r^   r   r   r   r2   r|   r`   combined_tpm_rpm_keyscombined_tpm_rpm_valuesr   r   r-   deployment_dictindexr   r   r   r   r   s                            r    async_get_available_deploymentsz:LowestTPMLoggingHandler_v2.async_get_available_deployments  sj     	##D[MQhi|h}~	
 W-$A!T"UU<,00 &,,R@%,,R@(( % !)8 3(,(9(9(O(O& )P )
 #
 #.03x=AJ0XAJJJ==# 3!! > 	

=	)))1#
2  :	 O&/0C&D"{k40$r:>>tDB&*O&.*5//%*F&.*5//:JB*O*S*S!4+ '.*5//,*K*O*O!4+ '.*/, 8B*U"3qK '+O&.*5//%*F&.*5//:JB*O*S*S!4+ '.*5//,*K*O*O!4+ '.*/, 8B*U"3qK (3%4'2%4	+OB'O 'EZ (('@@FFGG\]h\iiw  yH  xI  J! #*CG4!MM1AGkl		
 
a:	s9   AK#BK# D(!A K#"D* 'K#*.K FK  K#c           
         t        j                  d| d|        t               }|j                  d      }g }g }	|D ]z  }
t	        |
t
              s|
j                  di       j                  d      }dj                  ||      }dj                  ||      }|j                  |       |	j                  |       | | j                  j                  ||      }| j                  j                  |	|      }| j                  |||||	|||	      }	 |J |S # t        $ rg i }t        |      D ]*  \  }}t	        |t
              s|j                  di       j                  d      }d
}||j                  dd
      }|"|j                  di       j                  dd
      }|"|j                  di       j                  dd
      }|t        d      }|r||   nd}d
}||j                  dd
      }|"|j                  di       j                  dd
      }|"|j                  di       j                  dd
      }|t        d      }|r||   nd}||||d||<   - t        t         j"                  j$                   d| d|       w xY w)zE
        Returns a deployment with the lowest TPM/RPM usage.
        r   r   r0   r1   r2   r   r   )r   rf   r   Nr   r8   r9   r   r7   r   z. Passed model=r   )r   r   r   rO   r\   r   rP   rU   r   r'   batch_get_cacher   r[   r   rR   rv   r   r   rM   )r+   rs   r   r   r   rf   r]   r^   r   r   r   r2   r|   r`   r   r   r-   r   r   r   r   r   r   r   s                           r    get_available_deploymentsz4LowestTPMLoggingHandler_v2.get_available_deployments*  s    	##D[MQhi|h}~	
 W-$A!T"UU<,00 &,,R@%,,R@(( % &&66,< 7 

 &&66,< 7 

 ==# 3!! > 	

5	))) 2	 O&/0C&D"{k40$r:>>tDB&*O&.*5//%*F&.*5//:JB*O*S*S!4+ '.*5//,*K*O*O!4+ '.*/, 8B*U"3qK '+O&.*5//%*F&.*5//:JB*O*S*S!4+ '.*5//,*K*O*O!4+ '.*/, 8B*U"3qK (3%4'2%4	+OB'O 'EZ 88>>?{m[ijyiz{ a2	s   D .JEJ)NN)NNN)r   r   r   __doc__r#   boolr   r$   r   r%   r&   r   listr   r,   r   r   re   r   ro   r}   r   r   r   rm   r   r   r   r   r   r   r    r"   r"      s,   
 ItNCNC&11 OQ8%8378GK8L L(4. L\UU2:4.U	$Un.`0d5%!$Z5% 5% 	5%
 5%~ 48,0;; "; 	;
 TN; ; TN; 4S#X/0; c4i(); 
$;B 48,0vv "v 4S#X/0	v
 c4i()vx 48,0+/ff "f 4S#X/0	f
 c4i()f #4.fr   r"   )!r   typingr   r   r   r   r   r   rV   rS   r	   litellm._loggingr
   r   litellm.caching.cachingr   "litellm.integrations.custom_loggerr   'litellm.litellm_core_utils.core_helpersr   litellm.types.routerr   litellm.types.utilsr   r   litellm.utilsr   r   opentelemetry.tracer   _Spanr   r"   r   r   r    <module>r      sY     B B   ! B - ; U - Q 91DD+ r	 r	r   