
    g'                         d Z ddlZddlmZmZmZmZmZmZ ddl	m
Z
mZ ddlmZ ddlmZ ddlmZmZmZ ddlmZ dd	lmZmZmZmZmZmZ d
dlmZ  G d de      Z G d de      Z  G d de       Z! G d de      Z"y)z\
Translates from OpenAI's `/v1/chat/completions` endpoint to Triton's `/generate` endpoint.
    N)AnyDictListLiteralOptionalUnion)HeadersResponse)prompt_factory)BaseModelResponseIterator)
BaseConfigBaseLLMExceptionLiteLLMLoggingObj)AllMessageValues)ChatCompletionToolCallChunkChatCompletionUsageBlockChoicesGenericStreamingChunkMessageModelResponse   TritonErrorc                   &   e Zd ZdZdededeeef   de	fdZ
	 	 ddeded	ee   d
edee   dee   defdZdedefdZded
edededef
dZ	 	 ddededededed	ee   d
edededee   dee   defdZded	ee   d
edededefdZdeded   fdZy)TritonConfigzx
    Base class for Triton configurations.

    Handles routing between /infer and /generate triton completion llms
    error_messagestatus_codeheadersreturnc                     t        |||      S )N)r   messager   r   )selfr   r   r   s       b/var/www/openai/venv/lib/python3.12/site-packages/litellm/llms/triton/completion/transformation.pyget_error_classzTritonConfig.get_error_class%   s     #]G
 	
    Nmodelmessagesoptional_paramsapi_keyapi_basec                 
    ddiS )NzContent-Typezapplication/json )r"   r   r&   r'   r(   r)   r*   s          r#   validate_environmentz!TritonConfig.validate_environment,   s      233r%   c                 
    ddgS N
max_tokensmax_completion_tokensr,   )r"   r&   s     r#   get_supported_openai_paramsz(TritonConfig.get_supported_openai_params7   s    566r%   non_default_paramsdrop_paramsc                 V    |j                         D ]  \  }}|dk(  s|dk(  s|||<    |S r/   )items)r"   r3   r(   r&   r4   paramvalues          r#   map_openai_paramszTritonConfig.map_openai_params:   s=     /446LE5$1H(H).& 7 r%   raw_responsemodel_responselogging_objrequest_datalitellm_paramsencoding	json_modec                     |j                  dd      }| j                  |      }|dk(  r$t               j                  |||||||||	|
|      S |dk(  r$t	               j                  |||||||||	|
|      S |S )Nr*    generate)r&   r:   r;   r<   r=   r'   r(   r>   r?   r)   r@   infer)get_get_triton_llm_typeTritonGenerateConfigtransform_responseTritonInferConfig)r"   r&   r:   r;   r<   r=   r'   r(   r>   r?   r)   r@   r*   llm_types                 r#   rH   zTritonConfig.transform_responseF   s     "%%j"5,,X6z!')<<)-')! /-!# =    $&99)-')! /-!# :   r%   c                     |j                  dd      }| j                  |      }|dk(  rt               j                  |||||      S |dk(  rt	               j                  |||||      S i S )Nr*   rB   rC   )r&   r'   r(   r>   r   rD   )rE   rF   rG   transform_requestrI   )r"   r&   r'   r(   r>   r   r*   rJ   s           r#   rL   zTritonConfig.transform_requestt   s     "%%j"5,,X6z!');;! /- <    $&88! /- 9   	r%   )rC   rD   c                 f    |j                  d      ry|j                  d      ryt        d|       )Nz	/generaterC   z/inferrD   zInvalid Triton API base: )endswith
ValueError)r"   r*   s     r#   rF   z!TritonConfig._get_triton_llm_type   s7    [)x(8
CDDr%   NN)__name__
__module____qualname____doc__strintr   r   r	   r   r$   r   r   r   r-   r2   boolr9   r
   r   r   r   rH   dictrL   r   rF   r,   r%   r#   r   r      s   
 
/2
=B4==Q
	
 "&"&	4	4 	4 '(		4
 	4 #	4 3-	4 
	47 7 7
 
 
 	

 
 

. "&$(,, , &	,
 ', , '(, , , , #, D>, 
,\ '( 	
   
8ES EW=P5Q Er%   r   c                       e Zd ZdZdedee   dedededefdZ	 	 dded
e	de
dededee   dedededee   dee   de
fdZy	)rG   zP
    Transformations for triton /generate endpoint (This is a trtllm model)
    r&   r'   r(   r>   r   r   c                     |j                         }|j                  dd      }t        ||      t        |j	                  dd            dgdgdt        |      d}|d	   j                  |       |S )
NstreamF)r&   r'   r0   i  rB   )r0   	bad_words
stop_words)
text_input
parametersr[   r_   )copypopr   rV   rE   rW   update)	r"   r&   r'   r(   r>   r   inference_paramsr[   data_for_tritons	            r#   rL   z&TritonGenerateConfig.transform_request   s     +//1!%%h6(uxH!/"5"5lD"IJ T!d
 6l+
 	%,,-=>r%   Nr:   r;   r<   r=   r?   r)   r@   c                     	 |j                         }t        dt        |d               g|_        |S # t        $ r" t        |j                  |j                        w xY w)Nr!   r   r   text_outputcontentindexr!   )json	Exceptionr   textr   r   r   choices)r"   r&   r:   r;   r<   r=   r'   r(   r>   r?   r)   r@   raw_response_jsons                r#   rH   z'TritonGenerateConfig.transform_response   sn    	 , 1 1 3 !W5F}5U%VW"
   	$))|7O7O 	s	   4 +ArP   rQ   rR   rS   rT   rU   r   r   rX   rL   r
   r   r   r   r   r   rW   rH   r,   r%   r#   rG   rG      s     '( 	
   
B "&$(  &	
 '  '(    # D> 
r%   rG   c                       e Zd ZdZdedee   dedededefdZ	 	 dded
e	de
dededee   dedededee   dee   de
fdZy	)rI   zj
    Transformations for triton /infer endpoint (his is an infer model with a custom model on triton)
    r&   r'   r(   r>   r   r   c                 T   |d   j                  dd      }dddgd|gdgi}|j                         D ]T  \  }}	|d	k(  r|d
k(  rt        |	t              rdnd}
t        |	t              rdn|
}
|d   j                  |dg|
|	gd       V d|vr|d   j                  ddgddgd       |S )Nr   ri   rB   inputsr^      BYTES)nameshapedatatypedatar[   max_retriesINT32FP32r0      )rE   r6   
isinstancerV   floatappend)r"   r&   r'   r(   r>   r   r^   rd   kvry   s              r#   rL   z#TritonInferConfig.transform_request   s     a[__Y3
(S ''L		
 $))+DAqMQ-%7&0C&87g%/5%96x)00!(QCP	 , .H%,,(S 'D	 r%   Nr:   r;   r<   r=   r?   r)   r@   c                     	 |j                         }|d   d   d   }d }t        |t              rdj                  |      }n|}t        dt        |            g|_
        |S # t        $ r" t        |j                  |j                        w xY w)Nrf   outputsr   rz   rB   rh   rj   )rl   rm   r   rn   r   r   listjoinr   r   ro   )r"   r&   r:   r;   r<   r=   r'   r(   r>   r?   r)   r@   rp   _triton_response_datatriton_response_datas                  r#   rH   z$TritonInferConfig.transform_response   s    	 , 1 1 3 !2) <Q ? G.2+T2#%77+@#A #8  (<="
 '  	$))|7O7O 	s   A" "+BrP   rq   r,   r%   r#   rI   rI      s    && '(& 	&
 & & 
&f "&$(## # &	#
 '# # '(# # # # ## D># 
#r%   rI   c                       e Zd ZdedefdZy)TritonResponseIteratorchunkr   c           	      ,   	 d}d }d}d}d }d }t        |j                  dd            }|j                  dd      }|j                  dd      }|j                  dd      }t        |||||||      S # t        j                  $ r t        d	|       w xY w)
NrB   Frk   r   rg   stop_reasonis_finished)rn   tool_user   finish_reasonusagerk   provider_specific_fieldsz"Failed to decode JSON from chunk: )rV   rE   r   rl   JSONDecodeErrorrO   )	r"   r   rn   r   r   r   r   r   rk   s	            r#   chunk_parserz#TritonResponseIterator.chunk_parser#  s    	KD>BHKM8<E'+$		'1-.E 99]B/D!IImR8M))M59K(!'+)A  ## 	KA%IJJ	Ks   A.A1 1"BN)rQ   rR   rS   rX   r   r   r,   r%   r#   r   r   "  s    K$ K+@ Kr%   r   )#rT   rl   typingr   r   r   r   r   r   httpxr	   r
   3litellm.litellm_core_utils.prompt_templates.factoryr   )litellm.llms.base_llm.base_model_iteratorr   )litellm.llms.base_llm.chat.transformationr   r   r   litellm.types.llms.openair   litellm.types.utilsr   r   r   r   r   r   common_utilsr   r   rG   rI   r   r,   r%   r#   <module>r      sy     < < # N O 
 7  'xE: xEv3< 3lP, PfK6 Kr%   