
    gׅ              	       B   d Z ddlZddlZddlZddlZddlmZmZmZm	Z	m
Z
mZmZmZmZmZ ddlmZ ddlZddlmZmZ ddlmZ ddlmZ ddlmZ dd	lmZmZmZm Z m!Z!m"Z" erdd
l#m$Z% ddl&m'Z' neZ%eZ' G d de      Z( G d d      Z)	 dde	deeedf      de
e*ef   fdZ+y)a  
This contains LLMCachingHandler 

This exposes two methods:
    - async_get_cache
    - async_set_cache

This file is a wrapper around caching.py

This class is used to handle caching logic specific for LLM API requests (completion / embedding / text_completion / transcription etc)

It utilizes the (RedisCache, s3Cache, RedisSemanticCache, QdrantSemanticCache, InMemoryCache, DiskCache) based on what the user has setup

In each method it will call the appropriate method from caching.py
    N)
TYPE_CHECKINGAnyAsyncGeneratorCallableDict	GeneratorListOptionalTupleUnion)	BaseModel)print_verboseverbose_logger)S3Cache)1_assemble_complete_response_from_streaming_chunks)RerankResponse)	CallTypes	EmbeddingEmbeddingResponseModelResponseTextCompletionResponseTranscriptionResponse)LoggingCustomStreamWrapperc                   H    e Zd ZU dZdZee   ed<   dZee	   ed<   dZ
eed<   y)CachingHandlerResponsea  
    This is the response object for the caching handler. We need to separate embedding cached responses and (completion / text_completion / transcription) cached responses

    For embeddings there can be a cache hit for some of the inputs in the list and a cache miss for others
    Ncached_resultfinal_embedding_cached_responseF embedding_all_elements_cache_hit)__name__
__module____qualname____doc__r   r
   r   __annotations__r   r   r    bool     T/var/www/openai/venv/lib/python3.12/site-packages/litellm/caching/caching_handler.pyr   r   <   s7     $(M8C='CG#X.?%@G %d r(   r   c                   p   e Zd Zdedeeef   dej                  fdZ	 d'dedede	dej                  ded	eeef   d
e
eedf      defdZ	 d'dedede	dej                  ded	eeef   d
e
eedf      defdZde
e   dee
eeef         d	eeef   de	dej                  dedee
e   ef   fdZdededej                  dej                  def
dZde	dedej                  dej                  def
dZded	eeef   d
eedf   de
e   fdZ	 d'deded	eeef   de	ded
eedf   de
e   de
eeeeeeef      fdZdedede	dedef
dZ	 d'deded	eeef   d
e
eedf      fdZ	 d'ded	eeef   d
e
eedf      fdZded	eeef   defdZ dedefd Z!d!efd"Z"d!efd#Z#	 d(de	ded	eeef   ded$ed%efd&Z$y))LLMCachingHandleroriginal_functionrequest_kwargs
start_timec                 J    g | _         g | _        || _        || _        || _        y N)async_streaming_chunkssync_streaming_chunksr-   r,   r.   )selfr,   r-   r.   s       r)   __init__zLLMCachingHandler.__init__K   s.     <>#:<",!2$r(   Nmodellogging_obj	call_typekwargsargs.returnc                   K   ddl m} |xs d}d}	d}
d}|j                  dd      t        j                  |j                  dd      du ru|j                  di       j                  d	d      durPt        j                  ?| j                  |
      r,t        j                  d       | j                  |||       d{   }|jt        |t              sYt        j                  d       d}t        j                  j                         }t        j                  ||j                  dd      |j                  dd      |j                  dd            \  }}}}| j                  ||||d       |j                  }| j!                  ||||||j                  dd      |      }|j                  dd      du r| j#                  |||||       t        j                  j$                  di |}t        |t&              st        ||      rt)        |d      r||j*                  d<   t-        |      S |t.        j0                  j2                  k(  rq|ot        |t              r_t        j                  Ot        t        j                  j                  t4              s'| j7                  |	|||||      \  }	}
t-        |	|
      S t        j                  d|        t-        ||	      S 7 %w)aC  
        Internal method to get from the cache.
        Handles different call types (embeddings, chat/completions, text_completion, transcription)
        and accordingly returns the cached response

        Args:
            model: str:
            original_function: Callable:
            logging_obj: LiteLLMLoggingObj:
            start_time: datetime.datetime:
            call_type: str:
            kwargs: Dict[str, Any]:
            args: Optional[Tuple[Any, ...]] = None:


        Returns:
            CachingHandlerResponse:
        Raises:
            None
        r   r   r'   NFcachingTcachezno-cacher,   Checking Cache)r7   r8   r9   z
Cache Hit!custom_llm_providerapi_baseapi_keyr5   r@   rA   rB   r6   r5   r8   r   is_asyncr   r7   r8   r6   r5   r@   r9   streamr6   r   r.   end_time	cache_hit_hidden_params	cache_keyr   )r   r   r8   r6   r.   r5   )r   r    zCACHE RESULT: )r   r   )litellm.utilsr   getlitellmr=    _is_call_type_supported_by_cacher   debug_retrieve_from_cache
isinstancelistdatetimenowget_llm_provider'_update_litellm_logging_obj_environmentr!   (_convert_cached_result_to_model_response!_async_log_cache_hit_on_callbacks!_get_preset_cache_key_from_kwargsr   hasattrrK   r   r   
aembeddingvaluer   (_process_async_embedding_cached_response)r3   r5   r,   r6   r.   r7   r8   r9   r   r   r    r   rJ   rI   _rL   s                   r)   _async_get_cachez"LLMCachingHandler._async_get_cacheX   s    < 	6zrGK'16('+ZZ	4(0W]]5Nzz)U+t3JJw#''
E:$F}}(T-R-R"3 .S . $$%56&*&?&?'! '@ ' ! !,Zt5T"((6 $I'00446H%,%=%=#,2JJ7Ld,S!'J!= &

9d ;	&NE1a @@$/#%&3!% A  !2 : :I$($Q$Q&3"+%$/#,2JJ7Ld,S! %R %M zz(E2e;>>(3*7'1%-&/ ?  !( O O ! !I #=)<%m5HI!-1ABDM44[A1NN!5!5!;!;;%1"=$71&++W EE8W&3%$/#-# F 78 28W9Y  	~m_=>%',K
 	
]!s   B?K)K&H%K)c                    ddl m} |xs d}|j                         }	|	j                  t	        | j
                  |             d }
t        j                  | j                  |      rt        d       t        j                  j                  di |	}
|
Wd|
v rnQ|j                  }| j                  |
|||||j                  dd       |      }
d	}t        j                  j                         }t        j                   |xs d
|j                  dd       |j                  dd       |j                  dd             \  }}}}| j#                  ||||
d       t%        j&                  |j(                  |
|||f      j+                          t        j                  j,                  di |}t/        |
t0              st/        |
|      rt3        |
d      r||
j4                  d<   t7        |
      S t7        |
      S )Nr   r   r'   r>   r?   detailr@   rF   T rA   rB   rC   FrD   targetr9   rK   rL   rM   )rN   r   copyupdateconvert_args_to_kwargsr,   rP   r=   rQ   r   	get_cacher!   rZ   rO   rV   rW   rX   rY   	threadingThreadsuccess_handlerstartr\   rT   r   r]   rK   r   )r3   r5   r,   r6   r.   r7   r8   r9   r   
new_kwargsr   rJ   rI   r@   dynamic_api_keyrA   rL   s                    r)   _sync_get_cachez!LLMCachingHandler._sync_get_cache   s    	6zr[[]
"&&	
 (,==$)N)N/ *O *
 *+#MM33AjAM(}, 1 : :I$($Q$Q&3"+%$/#,2JJ7Ld,S! %R %M !%I'00446H  00#kr,2JJ7Ld,S!'J!= &

9d ;	+'  @@$/#%&3!& A  $$*::+Z9M eg ' O O ! !I #=)<%m5HI!-1ABDM44[A1NN%MBBr(   r   r   c           	         d}g }g }	t        |      D ]2  \  }
}||j                  |d   |
           |	j                  |
|f       4 |d   }||d<   t        |	      dkD  r|t        dt        |	              t	        |j                  d      dgt        |      z        }d|j                  d	<   |	D ](  }|\  }
}|t        |d
   |
d
      |j                  |
<   * t        |      dk(  rd}d}t        j                  j                         }t        j                  ||j                  dd      |j                  dd      |j                  dd            \  }}}}| j                  ||||dd       | j                  |||||       ||fS ||fS )a  
        Returns the final embedding cached response and a boolean indicating if all elements in the list have a cache hit

        For embedding responses, there can be a cache hit for some of the inputs in the list and a cache miss for others
        This function processes the cached embedding responses and returns the final embedding cached response and a boolean indicating if all elements in the list have a cache hit

        Args:
            final_embedding_cached_response: Optional[EmbeddingResponse]:
            cached_result: List[Optional[Dict[str, Any]]]:
            kwargs: Dict[str, Any]:
            logging_obj: LiteLLMLoggingObj:
            start_time: datetime.datetime:
            model: str:

        Returns:
            Tuple[Optional[EmbeddingResponse], bool]:
            Returns the final embedding cached response and a boolean indicating if all elements in the list have a cache hit


        FNinputr   zEMBEDDING CACHE HIT! - r5   )r5   dataTrJ   	embedding)rv   indexobjectr@   rA   rB   rC   )r6   r5   r8   r   rE   is_embeddingrH   )	enumerateappendlenr   r   rO   rK   r   ru   rV   rW   rP   rX   rY   r[   )r3   r   r   r8   r6   r.   r5   r    remaining_listnon_null_listidxcroriginal_kwargs_inputvalrJ   rI   r@   rq   rA   s                      r)   r`   z:LLMCachingHandler._process_async_embedding_cached_response*  s   : 27( /GCz%%fWoc&:;$$c2Y/	 0
 !'w(w}!3C4F3GHI.?jj)Vc"788/+ KO+::;G$R>@I"$[/!*A388= % ~!#I/3,((,,.H (($*JJ/Dd$KJ5

9d3	# 88'=! 9  22'=%!# 3  34TTT.0PPPr(   _caching_handler_responseembedding_responserI   c                    |j                   |S d}g }|j                   j                  D ]E  }|0|j                  $|j                  |j                  |          |dz  }5|j                  |       G ||j                   _        d|j                   j                  d<   ||z
  j	                         dz  |j                   _        |j                   S )a  
        Combines the cached embedding response with the API EmbeddingResponse

        For caching there can be a cache hit for some of the inputs in the list and a cache miss for others
        This function combines the cached embedding response with the API EmbeddingResponse

        Args:
            caching_handler_response: CachingHandlerResponse:
            embedding_response: EmbeddingResponse:

        Returns:
            EmbeddingResponse:
        r      TrJ   i  )r   ru   r{   rK   total_seconds_response_ms)r3   r   r   r.   rI   r   final_data_listitems           r)   2_combine_cached_embedding_response_with_api_resultzDLLMCachingHandler._combine_cached_embedding_response_with_api_result  s    ( %DDL%%-MMRRD| 2 7 7 C&&'9'>'>s'CDq&&t, S JY!AAF  	"AAPP	
 z!
-/DR!!AAN )HHHr(   rJ   c                     t        j                  |j                  ||||             t        j                  |j
                  ||||f      j                          y)a  
        Helper function to log the success of a cached result on callbacks

        Args:
            logging_obj (LiteLLMLoggingObj): The logging object.
            cached_result: The cached result.
            start_time (datetime): The start time of the operation.
            end_time (datetime): The end time of the operation.
            cache_hit (bool): Whether it was a cache hit.
        rf   N)asynciocreate_taskasync_success_handlerrl   rm   rn   ro   )r3   r6   r   r.   rI   rJ   s         r)   r[   z3LLMCachingHandler._async_log_cache_hit_on_callbacks  sW    $ 	--z8Y	

 	..XyA	
 %'r(   c                 2  K   t         j                  y|j                         }|j                  t	        | j
                  |             d}|t        j                  j                  k(  rt        |d   t              rg }t        |d         D ]Y  \  }}t        j                  j                  di i |d|i}	|j                  t         j                  j                  |	             [ t        j                   |  d{   }|$t        |t              rt#        d |D              rd}|S t         j                  j%                         du r)t        j                  j                  di | d{   }|S t        j                  j&                  di |}|S 7 7 )w)a  
        Internal method to
        - get cache key
        - check what type of cache is used - Redis, RedisSemantic, Qdrant, S3
        - async get cache value
        - return the cached value

        Args:
            call_type: str:
            kwargs: Dict[str, Any]:
            args: Optional[Tuple[Any, ...]] = None:

        Returns:
            Optional[Any]:
        Raises:
            None
        Nrt   )rL   c              3   $   K   | ]  }|d u  
 y wr0   r'   ).0results     r)   	<genexpr>z9LLMCachingHandler._retrieve_from_cache.<locals>.<genexpr>  s     BM&v~Ms   Tr'   )rP   r=   rh   ri   rj   r,   r   r^   r_   rT   rU   rz   get_cache_keyr{   async_get_cacher   gatherall_supports_asyncrk   )
r3   r7   r8   r9   rp   r   tasksr   ipreset_cache_keys
             r)   rS   z&LLMCachingHandler._retrieve_from_cache  sx    ( == [[]
"&&	
 (,	,,222zw8
 E#Jw$78Q#*==#>#> $00Wa0$  W]]::EU:VW	 9
 #*..%"88M(Zt-LBMBB$(M 	 }},,.$6&-mm&C&C&Qj&Q Q  !( 7 7 E* E 9 !Rs%   C:F<F=A.F+F,(FFr@   c                    ddl m} |t        j                  j                  k(  s|t        j
                  j                  k(  rLt        |t              r<|j                  dd      du r| j                  ||||      }n ||t                     }|t        j                  j                  k(  s|t        j                  j                  k(  rHt        |t              r8|j                  dd      du r| j                  ||||      }n%t        di |}n|t        j                  j                  k(  s|t        j                  j                  k(  r$t        |t              r ||t!               d	      }n|t        j"                  j                  k(  s|t        j$                  j                  k(  rt        |t              r ||d
d	      }nd|t        j&                  j                  k(  s|t        j(                  j                  k(  r*t        |t              rd|dd}	 ||t+               d|	      }t-        |d      r5|j.                  )t        |j.                  t              rd|j.                  d<   |S )aH  
        Internal method to process the cached result

        Checks the call type and converts the cached result to the appropriate model response object
        example if call type is text_completion -> returns TextCompletionResponse object

        Args:
            cached_result: Any:
            call_type: str:
            kwargs: Dict[str, Any]:
            logging_obj: LiteLLMLoggingObj:
            model: str:
            custom_llm_provider: Optional[str] = None:
            args: Optional[Tuple[Any, ...]] = None:

        Returns:
            Optional[Any]:
        r   ) convert_to_model_response_objectrG   FT)r   r7   r6   r5   )response_objectmodel_response_objectrv   )r   r   response_typeNrerankz	whisper-1)r5   r@   rJ   audio_transcription)r   r   r   hidden_paramsrK   rJ   r'   )rN   r   r   acompletionr_   
completionrT   dictrO   _convert_cached_stream_responser   atext_completiontext_completionr   r^   rv   r   arerankr   atranscriptiontranscriptionr   r]   rK   )
r3   r   r7   r8   r6   r5   r9   r@   r   r   s
             r)   rZ   z:LLMCachingHandler._convert_cached_result_to_model_response  s+   J 	C ..444I00666-zz(E*d2 $ D D"/' +	 !E ! !A$1*7/!
 33999I55;;;-zz(E*d2 $ D D"/' +	 !E ! !7 G G--333I//555-< -&7&9)M **000IAQAQAWAW4W-< -&*&M 11777I33999-$':!M
 = -&;&=3+	M M#34,,8=77>8<M((5r(   c                     ddl m}m}m} |t        j
                  j                  k(  s|t        j                  j                  k(  r
 ||      }n	 ||      } |||d|      S )Nr   )r   convert_to_streaming_response#convert_to_streaming_response_async)r   cached_response)completion_streamr5   r@   r6   )rN   r   r   r   r   r   r_   r   )	r3   r   r7   r6   r5   r   r   r   _stream_cached_results	            r)   r   z1LLMCachingHandler._convert_cached_stream_responsel  sp    	
 	
 ..444I66<<<$G -%! %B -%! #3 1#	
 	
r(   r   c                 0  K   t         j                  y|j                         }|j                  t	        ||             | j                  ||      rt        |t         j                        s;t        |t         j                        s!t        |t              st        |t              r9t        |t              rt        |d   t              rlt         j                  \t        t         j                  j                  t              s4t        j                  t        j                  j                  |fi |       yt        t         j                  j                  t              r@t!        j"                  t         j                  j$                  |f|      j'                          yt        j                  t        j                  j(                  |j+                         fi |       yt        j                  t        j                  j(                  |fi |       yyw)ah  
        Internal method to check the type of the result & cache used and adds the result to the cache accordingly

        Args:
            result: Any:
            original_function: Callable:
            kwargs: Dict[str, Any]:
            args: Optional[Tuple[Any, ...]] = None:

        Returns:
            None
        Raises:
            None
        Nr,   r8   rt   )rg   r9   r8   )rP   r=   rh   ri   rj   _should_store_result_in_cacherT   r   r   r   r   rU   r   r   r   async_add_cache_pipelinerl   rm   	add_cachero   async_add_cachemodel_dump_json)r3   r   r,   r8   r9   rp   s         r)   async_set_cachez!LLMCachingHandler.async_set_cache  s    * == [[]
"!	
 --/
 . 
 67#8#89fg&?&?@f&;<fn5 v'89":g#6=1&++W ''>>vTT   3 3W=$$&}}66$Y) eg''55"2248B ##GMM$A$A&$WJ$WXC
s   HHc                 
   |j                         }|j                  t        | j                  |             t        j
                  y| j                  | j                  |      r t	        j
                  j                  |fi | y)zE
        Sync internal method to add the result to the cache
        Nr   )rh   ri   rj   r,   rP   r=   r   r   )r3   r   r8   r9   rp   s        r)   sync_set_cachez LLMCachingHandler.sync_set_cache  sz     [[]
"&&	
 == --"44Z . 
 MM##F9j9r(   c                    t         j                  duxrs t         j                  j                  duxrU t        |j                        t         j                  j                  v xr$ |j                  di       j                  dd      duS )z
        Helper function to determine if the result should be stored in the cache.

        Returns:
            bool: True if the result should be stored in the cache, False otherwise.
        Nr=   zno-storeFT)rP   r=   supported_call_typesstrr!   rO   )r3   r,   r8   s      r)   r   z/LLMCachingHandler._should_store_result_in_cache  sy     ]]$& M22$>M&//0GMM4V4VVM GR(,,Z?tK		
r(   c                     t         j                  Jt         j                  j                  0t        |j                        t         j                  j                  v ryy)aO  
        Helper function to determine if the call type is supported by the cache.

        call types are acompletion, aembedding, atext_completion, atranscription, arerank

        Defined on `litellm.types.utils.CallTypes`

        Returns:
            bool: True if the call type is supported by the cache, False otherwise.
        TF)rP   r=   r   r   r!   )r3   r,   s     r)   rQ   z2LLMCachingHandler._is_call_type_supported_by_cache  sB     MM%22>%../7==3U3UUr(   processed_chunkc                   K   t        || j                  t        j                  j                         | j                  | j
                  d      }|1| j                  || j                  | j                         d{    yy7 w)z
        Internal method to add the streaming response to the cache


        - If 'streaming_chunk' has a 'finish_reason' then assemble a litellm.ModelResponse object
        - Else append the chunk to self.async_streaming_chunks

        Tr   r.   rI   r-   streaming_chunksrE   N)r   r,   r8   )r   r.   rV   rW   r-   r1   r   r,   r3   r   complete_streaming_responses      r)    _add_streaming_response_to_cachez2LLMCachingHandler._add_streaming_response_to_cache  s      >"&&**,..!88
 	$ '2&&2"&"8"8** '    3s   A9B;B<Bc                     t        || j                  t        j                  j                         | j                  | j
                  d      }|| j                  || j                         yy)zQ
        Sync internal method to add the streaming response to the cache
        Fr   N)r   r8   )r   r.   rV   rW   r-   r2   r   r   s      r)   %_sync_add_streaming_response_to_cachez7LLMCachingHandler._sync_add_streaming_response_to_cache+  sn     >"&&**,..!77
 	$ '22**    3r(   rE   ry   c                 6   |j                  dd      ||j                  dd      |j                  di       |j                  di       |j                  dd      |j                  di       d	}t        j                  #t        j                  j                  di ||d
<   nd|d
<   |j	                  ||j                  dd      i ||s|j                  dd      n|j                  dd      |j                  dd      t        |      d|j                  dd      	       y)a-  
        Helper function to update the LiteLLMLoggingObj environment variables.

        Args:
            logging_obj (LiteLLMLoggingObj): The logging object to update.
            model (str): The model being used.
            kwargs (Dict[str, Any]): The keyword arguments from the original function call.
            cached_result (Any): The cached result to log.
            is_async (bool): Whether the call is asynchronous or not.
            is_embedding (bool): Whether the call is for embeddings or not.

        Returns:
            None
        	logger_fnNrA   re   metadata
model_infoproxy_server_requeststream_response)r   r   rA   r   r   r   r   r   usermessagesrt   rB   rG   F)	r5   r   optional_paramslitellm_paramsrt   rB   original_responseadditional_argsrG   r'   )rO   rP   r=   r\   update_environment_variablesr   )r3   r6   r5   r8   r   rE   ry   r   s           r)   rY   z9LLMCachingHandler._update_litellm_logging_obj_environmentA  s   0  K6#

:r2

:r2 **\26$*JJ/Et$L%zz*;R@
 ==$??I&I -. 26N-.00FD)) $ 

:r*ZZ,JJy$/!-0 ::h. 	1 	
r(   r0   )F)%r!   r"   r#   r   r   r   r   rV   r4   LiteLLMLoggingObjr
   r   r   rb   rr   r   r	   r&   r`   r   r[   rS   r   r   r   r   r   r   rZ   r   r   r   r   rQ   r   r   rY   r'   r(   r)   r+   r+   J   s   # S#X %%	* +/@
@
 $@
 '	@

 %%@
 @
 S#X@
 uS#X'@
 
 @
T +/NCNC $NC '	NC
 %%NC NC S#XNC uS#X'NC 
 NC`XQ)12C)DXQ HT#s(^45XQ S#X	XQ
 'XQ %%XQ XQ 
x)*D0	1XQt'I#9'I .'I %%	'I
 ##'I 
'IR&  %%	
 ## 833&*38n3<A#s(O3	#3z .2ll l S#X	l
 'l l CHol &c]l 
"!!	
	
l\

 
 '	

 
 

J +/AYAY $AY S#X	AY
 uS#X'AYN +/	 S#X uS#X'	4
!)
37S>
	
 # 
,m 8] : #6
&6
 6
 S#X	6

 6
 6
 6
r(   r+   r,   r9   .r:   c                     t        j                  |       }t        |j                  j	                               }i }|r,t        |      D ]  \  }}|t        |      k  s||   }|||<     |S r0   )inspect	signaturerU   
parameterskeysrz   r|   )r,   r9   r   param_namesargs_to_kwargsrw   arg
param_names           r)   rj   rj   z  su    
 !!"34I y++0023K N#D/JE3s;''(/
-0z* *
 r(   r0   ),r$   r   rV   r   rl   typingr   r   r   r   r   r   r	   r
   r   r   pydanticr   rP   litellm._loggingr   r   litellm.caching.cachingr   (litellm.litellm_core_utils.logging_utilsr   litellm.types.rerankr   litellm.types.utilsr   r   r   r   r   r   *litellm.litellm_core_utils.litellm_loggingr   r   rN   r   r   r+   r   rj   r'   r(   r)   <module>r      s             : + 0  W1Y m
 m
d '+
5c?
# 
#s(^r(   