
    g(              "           d dl mZmZmZ d dlmZmZ d dlZd dlm	Z	 d dl
mZ ddlmZ g dddddddddddddddd	fd
ededee   dee   dee   dee   dee   dee   dee   dee   dee   dee   dee   dee   dee   dee   f dZd Zd Zy)    )FIRST_COMPLETEDThreadPoolExecutorwait)ListOptionalN)print_verbose)get_optional_params   )handleriX  d   modelmessages	functionsfunction_calltemperaturetop_pnstream
max_tokenspresence_penaltyfrequency_penalty
logit_biasuserrequest_timeouttimeoutmax_workersc                 T   t               }|}g }| } d}| j                  dd      d   t        j                  v r*| j                  dd      d   }| j                  dd      d   } |dk(  rFt	        ||||||xs d||	|
|||| |      }t        j                  | |t        j                  |      }|S d	 }t        |
      5 } ||d      D ]z  }|D ]s  }|j                         }|j                  d       ||d<   i }d|v r|j                  d      } |j                  t        j                  fi ||}|j                  |       u | 	 ddd       g }|D ]"  }	 |j                  |j                                $ |S # 1 sw Y   4xY w# t        $ r} |j                  |        Y d} ~ Rd} ~ ww xY w)a   
    Batch litellm.completion function for a given model.

    Args:
        model (str): The model to use for generating completions.
        messages (List, optional): List of messages to use as input for generating completions. Defaults to [].
        functions (List, optional): List of functions to use as input for generating completions. Defaults to [].
        function_call (str, optional): The function call to use as input for generating completions. Defaults to "".
        temperature (float, optional): The temperature parameter for generating completions. Defaults to None.
        top_p (float, optional): The top-p parameter for generating completions. Defaults to None.
        n (int, optional): The number of completions to generate. Defaults to None.
        stream (bool, optional): Whether to stream completions or not. Defaults to None.
        stop (optional): The stop parameter for generating completions. Defaults to None.
        max_tokens (float, optional): The maximum number of tokens to generate. Defaults to None.
        presence_penalty (float, optional): The presence penalty for generating completions. Defaults to None.
        frequency_penalty (float, optional): The frequency penalty for generating completions. Defaults to None.
        logit_bias (dict, optional): The logit bias for generating completions. Defaults to {}.
        user (str, optional): The user string for generating completions. Defaults to "".
        deployment_id (optional): The deployment ID for generating completions. Defaults to None.
        request_timeout (int, optional): The request timeout for generating completions. Defaults to None.
        max_workers (int,optional): The maximum number of threads to use for parallel processing.

    Returns:
        list: A list of completion results.
    N/   r   vllmF)r   r   r   r   r   r   stopr   r   r   r   r   r   custom_llm_provider)r   r   custom_prompt_dictoptional_paramsc              3   V   K   t        dt        |       |      D ]  }| |||z      yw)z)Yield successive n-sized chunks from lst.r   N)rangelen)lstr   is      R/var/www/openai/venv/lib/python3.12/site-packages/litellm/batch_completion/main.pychunksz batch_completion.<locals>.chunks_   s-     1c#h*!a!en$ +s   ')r   r   r   r   kwargs)localssplitlitellmprovider_listr	   vllm_handlerbatch_completionsr#   r   copypopsubmit
completionappendresult	Exception)!r   r   r   r   r   r   r   r   r!   r   r   r   r   r   deployment_idr   r   r   r-   argsbatch_messagescompletionsr"   r$   resultsr+   executor	sub_batchmessage_listkwargs_modifiedoriginal_kwargsfutureexcs!                                    r*   batch_completionrG      s   ` 8DNKE{{31!6!66#kk#q1!4C#A&f$-'#?U!-/! 3
" 00#&99+	
L N;	%
  K8H#NC8	$-L&*iikO#''62>OJ/&(O?2*9*=*=h*G,X__**.=APF  &&v. %. 9 9" !F$v}}/ " N1 98*  $s##$s%   <BE7F7F 	F'F""F'c                     d|v r|j                  d       d|v r|d   |j                  d       i }t        t                    5 }D ]+  } |j                  t        j
                  g| d|i|||<   - t        |j                         fd      D ]0  \  }}|j                         |j                         c cddd       S  	 ddd       yd|v r*|d   }|j                  d       |j                  d       |j                  d	i       }i }t        t        |            5 }|D ]Q  }|j                         D ]  }	|	|vs||	   ||	<    i ||} |j                  t        j
                  fi |||d   <   S |rxt        d
       t        |j                         t              \  }
}t        d|
        |
D ]  }	 |j                         }|c cddd       S  t        d       t        |       |rxddd       yy# 1 sw Y   yxY w# t        $ r\ t        d       t        |       i }|j                         D ]  \  }	}||k(  rt        d|	        |||	<     |}t        d|        Y w xY w# 1 sw Y   yxY w)a  
    Send a request to multiple language models concurrently and return the response
    as soon as one of the models responds.

    Args:
        *args: Variable-length positional arguments passed to the completion function.
        **kwargs: Additional keyword arguments:
            - models (str or list of str): The language models to send requests to.
            - Other keyword arguments to be passed to the completion function.

    Returns:
        str or None: The response from one of the language models, or None if no response is received.

    Note:
        This function utilizes a ThreadPoolExecutor to parallelize requests to multiple models.
        It sends requests concurrently and returns the response from the first model that responds.
    r   modelsr,   c                 ,    j                  | d         S )Nr   )index)xrI   s    r*   <lambda>z)batch_completion_models.<locals>.<lambda>   s    v||AaD/A    )keyNdeployments
model_listr-   z

 waiting for next result

)return_whenz
done list
z3

got an exception, ignoring, removing from futureszremoving keyznew futuresz 

done looping through futures

)r5   r   r'   r6   r0   r7   sorteditemsr9   keysr   r   valuesr   r:   )r<   r-   futuresr@   r   rE   rP   nested_kwargs
deploymentrO   done_r9   new_futuresvaluerI   s                  @r*   batch_completion_modelsr^      s   & &

76!

8CK8H!0&&")-"5:">D"  
 "(%A"v ==?.!==?* 98" 9t _ 
&	 ]+

=!

< 

8R0C,<=)
!;;=C:-*0+
3	 )
 9J8-8/>x&&0*00
7+, * @Aw~~/_MaD623"F!!'%) >=" #* DEg&7  >R u 9t ) % !%S &g.&(*1--/JC% -SE.B C (38C 0 +: #.%G9&=> !+ >R sW   A%H.H	H)I8A;I8HI8 I8HA"I52I84I55I88Jc                     ddl }d|v r|j                  d       d|v r|d   }|j                  d       nt        d      g }|j                  j	                  t        |            5 }t        |      D ][  \  }} |j                  t        j                  g| d|i|}|j                         =|j                  |j                                ] 	 ddd       |S # 1 sw Y   |S xY w)a  
    Send a request to multiple language models concurrently and return a list of responses
    from all models that respond.

    Args:
        *args: Variable-length positional arguments passed to the completion function.
        **kwargs: Additional keyword arguments:
            - models (str or list of str): The language models to send requests to.
            - Other keyword arguments to be passed to the completion function.

    Returns:
        list: A list of responses from the language models that responded.

    Note:
        This function utilizes a ThreadPoolExecutor to parallelize requests to multiple models.
        It sends requests concurrently and collects responses from all models that respond.
    r   Nr   rI   z'models' param not in kwargsr,   )concurrent.futuresr5   r:   rW   r   r'   	enumerater6   r0   r7   r9   r8   )	r<   r-   
concurrentrI   	responsesr@   idxr   rE   s	            r*   %batch_completion_models_all_responsesre      s    $  &

76!

8677I				.	.3v;	.	G8#F+JC$X__W%7%7V$VeVvVF}}*  1 , 
H  
H s   'AC0!CC&)r`   r   r   r   typingr   r   r0   litellm._loggingr   litellm.utilsr	   llms.vllm.completionr   r2   strfloatintbooldictrG   r^   re    rN   r*   <module>rp      sH   H H !  * - :  $#'#'!!	 $(,)-!%%) !$'qq q ~	q
 C=q %q E?q }q TNq q uoq  q q 3-q" c]#q$ c]%q& #'qlSl&rN   