
    ge+                        d Z ddlZddlmZmZmZmZmZ ddlZddl	m
Z
 ddlmZ ddlmZmZ erddlmZ dd	lmZ eZeZneZeZd
ZdZdedee   deeef   dedef
dZdededeeef   dedef
dZ	 	 ddededeeef   dee   dee   defdZdedee   dee   fdZ dedee   dee!   fdZ"dedee   dee   fdZ#dedededefdZ$dedefdZ%deeef   defdZ&y)a  
Router cooldown handlers
- _set_cooldown_deployments: puts a deployment in the cooldown list
- get_cooldown_deployments: returns the list of deployments in the cooldown list
- async_get_cooldown_deployments: ASYNC: returns the list of deployments in the cooldown list

    N)TYPE_CHECKINGAnyListOptionalUnion)verbose_router_logger)router_cooldown_event_callback   )*get_deployment_failures_for_current_minute+get_deployment_successes_for_current_minute)Span)Routerg      ?   litellm_router_instance
deploymentexception_statusoriginal_exceptionreturnc                 ~    | j                   ry|y| j                  ||t        |            sy|| j                  v ryy)a  
    Helper that decides if cooldown logic should be run
    Returns False if cooldown logic should not be run

    Does not run cooldown logic when:
    - router.disable_cooldowns is True
    - deployment is None
    - _is_cooldown_required() returns False
    - deployment is in litellm_router_instance.provider_default_deployment_ids
    - exception_status is not one that should be immediately retried (e.g. 401)
    F)model_idr   exception_strT)disable_cooldowns_is_cooldown_requiredstrprovider_default_deployment_ids)r   r   r   r   s       [/var/www/openai/venv/lib/python3.12/site-packages/litellm/router_utils/cooldown_handlers.py_should_run_cooldown_logicr   %   sU    " 00"88),- 9 
 ,LLL    c                 h   | j                   t        |       du rt        | |      }t        | |      }||z   }d}|dkD  r|||z   z  }t	        j
                  d||||       t        |      }|dk(  ry|d	k(  ry|t        kD  ryt        j                  t        |      
      du ryyt        | ||      S )a_  
    Helper that decides if a deployment should be put in cooldown

    Returns True if the deployment should be put in cooldown
    Returns False if the deployment should not be put in cooldown


    Deployment is put in cooldown when:
    - v2 logic (Current):
    cooldown if:
        - got a 429 error from LLM API
        - if %fails/%(successes + fails) > ALLOWED_FAILURE_RATE_PER_MINUTE
        - got 401 Auth error, 404 NotFounder - checked by litellm._should_retry()



    - v1 logic (Legacy): if allowed fails or allowed fail policy set, coolsdown if num fails in this minute > allowed fails
    r   F)r   deployment_idg        r   zYpercent fails for deployment = %s, percent fails = %s, num successes = %s, num fails = %si  Tr
   )status_code)r   r   r   )allowed_fails_policy_is_allowed_fails_set_on_routerr   r   r   debugcast_exception_status_to_int!DEFAULT_FAILURE_THRESHOLD_PERCENTlitellm_should_retry-should_cooldown_based_on_allowed_fails_policy)	r   r   r   r   num_successes_this_minutenum_fails_this_minutetotal_requests_this_minutepercent_failsexception_status_ints	            r   _should_cooldown_deploymentr0   I   s	   2 	 44<+$;
 
 %P$;:%
! !K$;:!
 &?AV%V"%)1),AAM 	##g%!	
  <<LM3&&!+>> !!89IJ 
 <$;!1
 	
r   time_to_cooldownc                 :   t        | |||      du s|yt        |      }t        j                  d| d       | j                  xs d}||}t        | |||      rB| j                  j                  ||||       t        j                  t        | |||             yy)aL  
    Add a model to the list of models being cooled down for that minute, if it exceeds the allowed fails / minute

    or

    the exception is not one that should be immediately retried (e.g. 401)

    Returns:
    - True if the deployment should be put in cooldown
    - False if the deployment should not be put in cooldown
    FzAttempting to add z to cooldown listr
   )r   r   r   cooldown_time)r   r!   r   r3   T)r   r&   r   r%   r3   r0   cooldown_cacheadd_deployment_to_cooldownasynciocreate_taskr	   )r   r   r   r   r1   r/   r3   s          r   _set_cooldown_deploymentsr8      s    & 	##Z1ACU	
 	 78HI"4ZL@Q RS+99>QM#("-=?Q 	 ..II11'	 	J 	
 	*(?(!1+		
 r   parent_otel_spanc                 D  K   | j                         }| j                  j                  ||       d{   }g }|Ct        |t              r3t        |      dkD  r%t        |d   t              r|D cg c]  }|d   	 }}t        j                  d|        |S 7 ec c}w w)=
    Async implementation of '_get_cooldown_deployments'
    	model_idsr9   Nr   retrieve cooldown models: )	get_model_idsr4   async_get_active_cooldowns
isinstancelistlentupler   r%   r   r9   r=   cooldown_modelscached_value_deployment_idscvs         r   _async_get_cooldown_deploymentsrI      s      (557I%44OO- P 
 	
  #%#- 1$q)517F&Gr!u#&G"<_<M NO&&!	
 'Hs!   1B B=B 1B=B B c                    K   | j                         }| j                  j                  ||       d{   }t        j                  d|        |S 7 w)r;   r<   Nr>   )r?   r4   r@   r   r%   )r   r9   r=   rF   s       r   /_async_get_cooldown_deployments_with_debug_inforK      sd      (557I%44OO2B P 
 	
  "<_<M NO	
s   1AAAc                     | j                         }| j                  j                  ||      }g }|Ct        |t              r3t        |      dkD  r%t        |d   t              r|D cg c]  }|d   	 }}|S c c}w )zB
    Get the list of models being cooled down for this minute
    r<   r   )r?   r4   get_active_cooldownsrA   rB   rC   rD   rE   s         r   _get_cooldown_deploymentsrN      s     (557I-<<QQ.> R O #%#- 1$q)517F&Gr!u#&G&& 'Hs   'A7c                     | j                  |      xs | j                  }| j                  xs t        }| j                  j                  |      xs d}|dz   }||kD  ry| j                  j                  |||       y)z
    Check if fails are within the allowed limit and update the number of fails.

    Returns:
    - True if fails exceed the allowed limit (should cooldown)
    - False if fails are within the allowed limit (should not cooldown)
    )	exception)keyr   r
   T)rQ   valuettlF)get_allowed_fails_from_policyallowed_failsr3   DEFAULT_COOLDOWN_TIME_SECONDSfailed_calls	get_cache	set_cache)r   r   r   rU   r3   current_failsupdated_failss          r   r*   r*     s     	 ==( 	> 	
 	1 #00	  	 --N1N  ,88BBzBRWVWM!A%M}$,,66-] 	7 	
 r   c                 Z    | j                   y| j                   t        j                   k7  ryy)z
    Check if Router.allowed_fails is set or is Non-default Value

    Returns:
    - True if Router.allowed_fails is set or is Non-default Value
    - False if Router.allowed_fails is None or is Default Value
    FT)rU   r(   r    s    r   r$   r$   @  s-     ,,4,,0E0EEr   c                     t        | t              r	 t        |       } | S | S # t        $ r t	        j
                  d|  d       d} Y | S w xY w)Nz'Unable to cast exception status to int z. Defaulting to status=500.i  )rA   r   int	Exceptionr   r%   )r   s    r   r&   r&   Q  sh    "C(	#"#34   	#!''9:J9KKfg  #	#s   ! $A	A	)NN)'__doc__r6   typingr   r   r   r   r   r(   litellm._loggingr   'litellm.router_utils.cooldown_callbacksr	   )router_callbacks.track_deployment_metricsr   r   opentelemetry.tracer   _Spanlitellm.routerr   _RouterLitellmRouterr'   rV   r   r^   boolr   r0   floatr8   rI   rD   rK   rN   r*   r$   r&    r   r   <module>rm      s    < <  2 R
 10MDMD " !" !*!! CHo! 	!
 
!HM*MM CHoM 	M
 
Mh !%(,6*66 CHo6 	6
 uo6 
6r'*'tn' 
#Y'8*tn 
%[$'*'>Ftn'	#Y': *     
	 F*	"	5c? 	s 	r   