
    gY:                     b    d Z ddlZddlZddlZddlmZ ddlZddlmZ ddl	m
Z
  G d de
      Zy)	z
Qdrant Semantic Cache implementation

Has 4 methods:
    - set_cache
    - get_cache
    - async_set_cache
    - async_get_cache
    N)Any)print_verbose   )	BaseCachec                   R    e Zd Z	 	 	 	 	 	 	 ddZdefdZd Zd Zd Zd Z	d	 Z
d
 Zy)QdrantSemanticCacheNc                    dd l }ddlm}	m}
m} ddlm} |t        d      || _        t        d| j                          |t        d      || _
        || _        i }|r)t        |t              r|j                  d      r ||      }|r)t        |t              r|j                  d      r ||      }|xs$ |j                  d      xs |j                  d	      }|xs |j                  d
      }ddi}|r||d<   |t!        d      || _        || _        t        d| j"                          || _         |	       | _         |
|j*                        | _        |t        d       | j(                  j/                  | j"                   d| j                   d| j&                        }|j0                  dk7  rt!        d|j2                         |j5                         d   d   rn| j(                  j/                  | j"                   d| j                   | j&                        }|j5                         | _        t        d| j6                          y ||dk(  rdddii}n&|dk(  r	ddddd i}n|d!k(  rd!d"dd#i}nt        d$      | j(                  j9                  | j"                   d| j                   d%d&d'|d(| j&                  )      }|j5                         d   rn| j(                  j/                  | j"                   d| j                   | j&                        }|j5                         | _        t        d*| j6                          y t        d+      ),Nr   )_get_httpx_clientget_async_httpx_clienthttpxSpecialProvider)get_secret_strz-collection_name must be provided, passed Nonez0qdrant semantic-cache initializing COLLECTION - z2similarity_threshold must be provided, passed Nonezos.environ/
QDRANT_URLQDRANT_API_BASEQDRANT_API_KEYzContent-Typezapplication/jsonzapi-keyzQdrant url must be providedz'qdrant semantic-cache qdrant_api_base: )llm_providerzNQuantization config is not provided. Default binary quantization will be used./collections/z/exists)urlheaders   z1Error from qdrant checking if /collections exist resultexistsz.Collection already exists.
Collection details:binary
always_ramFscalarint8gGz?)typequantiler   productx16)compressionr   zBQuantization config must be one of 'scalar', 'binary' or 'product'i   Cosine)sizedistance)vectorsquantization_config)r   jsonr   z+New collection created.
Collection details:z#Error while creating new collection)os&litellm.llms.custom_httpx.http_handlerr
   r   r   litellm.secret_managers.mainr   	Exceptioncollection_namer   similarity_thresholdembedding_model
isinstancestr
startswithgetenv
ValueErrorqdrant_api_baseqdrant_api_keyr   sync_clientCachingasync_clientgetstatus_codetextr&   collection_infoput)selfr3   r4   r+   r,   r%   r-   	host_typer'   r
   r   r   r   r   collection_existscollection_detailsquantization_paramsnew_collection_statuss                     Z/var/www/openai/venv/lib/python3.12/site-packages/litellm/caching/qdrant_semantic_cache.py__init__zQdrantSemanticCache.__init__   s    		
 	

 	@"KLL.>t?S?S>TU	
  'PQQ$8!. /3/O4N4N5 #1"A.#.>3L3L4 "0!? Vryy6V"))DU:V 	 (F2995E+F!#56!/GI":;;.,?@T@T?UVW,.2-55
 &` !,,00''(d6J6J5K7SLL 1 
 ((C/CDUDZDZC[\  !!#H-h7!%!1!1!5!5++,M$:N:N9OP "6 " $6#:#:#<D A$BVBVAWX #*.AX.M$e'#
 %0v4uU'# %	1uEJ'#  X  %)$4$4$8$8++,M$:N:N9OP(,(C+>  %9 %! %))+H5%)%5%5%9%9//0d>R>R=ST LL &: &" (:'>'>'@$B4CWCWBXY   EFF    cached_responsec                     ||S 	 t        j                  |      }|S # t        $ r t        j                  |      }Y |S w xY wN)r&   loadsr*   astliteral_eval)r=   rF   s     rC   _get_cache_logicz$QdrantSemanticCache._get_cache_logic   sV    """	@"jjO
   	@!..?O	@s    ??c                    t        d|        dd l}|d   }d}|D ]
  }||d   z  } t        j                  | j                  |ddd      }|d	   d   d
   }	t        |      }t        |t
              sJ dt        |j                               |	||ddgi}
| j                  j                  | j                   d| j                   d| j                  |
       y )Nz)qdrant semantic-cache set_cache, kwargs: r   messages contentTzno-storezno-cachemodelinputcachedata	embeddingpointsr:   responseidvectorpayloadr   /pointsr   r   r&   )r   uuidlitellmrW   r-   r/   r.   uuid4r5   r<   r3   r+   r   )r=   keyvaluekwargsra   rN   promptmessageembedding_responserW   rV   s              rC   	set_cachezQdrantSemanticCache.set_cache   s   A&JK *%Ggi((F   %..&&#6
 'v.q1+>	E
%%%% djjl+' &$) 	
 	''(d6J6J5K7SLL 	 	

 	rE   c           
         t        d|        |d   }d}|D ]
  }||d   z  } t        j                  | j                  |ddd      }|d   d	   d
   }|dddddiddd}| j                  j                  | j                   d| j                   d| j                  |      }	|	j                         d   }
|
y t        |
t              rt        |
      d	k(  ry |
d	   d   }|
d	   d   d   }t        d| j                   d| d| d|        || j                  k\  r1|
d	   d   d   }t        d| d| d|        | j                  |       S y )!Nz.sync qdrant semantic-cache get_cache, kwargs: rN   rO   rP   TrQ   rR   rV   r   rW   quantizationF      @ignorerescoreoversamplingr   r]   paramslimitwith_payloadr   /points/searchr`   r   scorer^   r:   &semantic cache: similarity threshold: , similarity: 
, prompt: , closest_cached_prompt: rZ   got a cache hit, similarity: , Current prompt: , cached_prompt: rF   )r   rb   rW   r-   r5   postr3   r+   r   r&   r.   listlenr,   rL   )r=   rd   rf   rN   rg   rh   ri   rW   rV   search_responseresults
similaritycached_promptcached_values                 rC   	get_cachezQdrantSemanticCache.get_cache   s   FvhOP *%Ggi((F   %..&&#6
 'v.q1+>	  ##$'!  
 **//''(d6J6J5K>ZLL 0 

 "&&(2?gt$7|q QZ(

9-f5 	4T5N5N4O~^h]iistzs{  |U  Vc  Ud  e	
 222"1:i0<L/
|;MfXUfgtfuv (((FF rE   c                 6  K   dd l }ddlm}m} t	        d|        |d   }d}|D ]
  }	||	d   z  } ||D 
cg c]  }
|
d   	 c}
ng }|| j
                  |v rq|j                  di       j                  d	d      }|j                  | j
                  |d
d
d|d
|j                  di       j                  dd       d       d {   }n-t        j                  | j
                  |d
d
d       d {   }|d   d   d   }t        |      }t        |t              sJ dt        |j                               |||ddgi}| j                  j                  | j                   d| j                   d| j                   |       d {    y c c}
w 7 7 7 w)Nr   llm_model_list
llm_routerz/async qdrant semantic-cache set_cache, kwargs: rN   rO   rP   
model_namemetadatauser_api_keyTrQ   trace_idr   zsemantic-cache-embeddingr   rS   rT   rU   r   rR   rV   rW   rX   rY   r[   r   r_   r`   )ra   litellm.proxy.proxy_serverr   r   r   r-   r8   
aembeddingrb   r/   r.   rc   r7   r<   r3   r+   r   )r=   rd   re   rf   ra   r   r   rN   rg   rh   mrouter_model_namesr   ri   rW   rV   s                   rC   async_set_cachez#QdrantSemanticCache.async_set_cache  s    IGxPQ *%Ggi((F  
 ) '55nQ|_n5 	
 !d&:&:>P&P!::j"599."ML'1'<'<**#'T:$004 &

:r : > >z4 P	 (= 	( 	" (/'9'9**#'T:( " 'v.q1+>	E
%%%% djjl+' &$) 	
 ##''(d6J6J5K7SLL $ 
 	
 	

 	a 6	""2	
sB   7FFB FF-F3F4BFF	FFFc                   K   t        d|        ddlm}m} |d   }d}|D ]
  }||d   z  } ||D cg c]  }|d   	 c}ng }	|| j                  |	v rq|j                  di       j                  d	d      }
|j                  | j                  |d
d
d|
d
|j                  di       j                  dd       d       d {   }n-t        j                  | j                  |d
d
d       d {   }|d   d   d   }|ddd
ddidd
d}| j                  j                  | j                   d| j                   d| j                  |       d {   }|j                         d   }|d|j                  di       d<   y t        |t               r$t#        |      dk(  rd|j                  di       d<   y |d   d   }|d   d   d    }t        d!| j$                   d"| d#| d$|        ||j                  di       d<   || j$                  k\  r1|d   d   d%   }t        d&| d'| d(|        | j'                  |)      S y c c}w 7 7 X7 w)*Nz/async qdrant semantic-cache get_cache, kwargs: r   r   rN   rO   rP   r   r   r   TrQ   r   r   r   rR   rV   rW   rl   Frm   rn   r   rr   r   rv   r`   r   g        zsemantic-similarityrw   r^   r:   rx   ry   rz   r{   rZ   r|   r}   r~   r   )r   r   r   r   r-   r8   r   rb   r7   r   r3   r+   r   r&   
setdefaultr.   r   r   r,   rL   )r=   rd   rf   r   r   rN   rg   rh   r   r   r   ri   rW   rV   r   r   r   r   r   s                      rC   async_get_cachez#QdrantSemanticCache.async_get_cacheM  s    GxPQI *%Ggi((F  
 ) '55nQ|_n5 	
 !d&:&:>P&P!::j"599."ML'1'<'<**#'T:$004 &

:r : > >z4 P	 (= 	( 	" (/'9'9**#'T:( " 'v.q1+>	  ##$'!  
 !% 1 1 6 6''(d6J6J5K>ZLL !7 !
 
 "&&(2?GJFj"-.CDgt$7|q KN!!*b12GHQZ(

9-f5 	4T5N5N4O~^h]iistzs{  |U  Vc  Ud  e	

 DN*b)*?@222"1:i0<L/
|;MfXUfgtfuv (((FF Y 6	"",
sC   3IH?B II-I/I0A!II
C3II
Ic                 "   K   | j                   S wrH   )r;   )r=   s    rC   _collection_infoz$QdrantSemanticCache._collection_info  s     ###s   c                    K   g }|D ]+  }|j                   | j                  |d   |d   fi |       - t        j                  |  d {    y 7 w)Nr   r   )appendr   asynciogather)r=   
cache_listrf   tasksvals        rC   async_set_cache_pipelinez,QdrantSemanticCache.async_set_cache_pipeline  sP     CLL---c!fc!fGGH nne$$$s   AA
AA)NNNNNztext-embedding-ada-002N)__name__
__module____qualname__rD   r   rL   rj   r   r   r   r   r    rE   rC   r   r      sP     ! 0GB	 	(T>@>@Xt$%rE   r   )__doc__rJ   r   r&   typingr   rb   litellm._loggingr   
base_cacher   r   r   rE   rC   <module>r      s/         * !X%) X%rE   