
    gN                       d dl mZ d dlZd dlZd dlmZmZ d dlm	Z	m
Z
 ddZ	 	 d	 	 	 	 	 	 	 	 	 	 	 ddZ	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddZ	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddZ	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dd	Zddd
ZddZ	 	 	 d	 	 	 	 	 	 	 ddZddZd dZ	 	 	 	 	 	 	 	 	 	 	 	 d!dZ	 	 	 	 	 	 	 	 	 	 d"dZ	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 d#dZy)$    )annotationsN)
AutoConfigAutoTokenizer)InferenceSessionOrtValuec                    | j                         j                  d      dz
  }|j                  | dk(  d       |r|d d df   j                  d      }|S )N   r   )longcumsummasked_fill_	unsqueeze)attention_maskuse_past_kvposition_idss      g/var/www/openai/venv/lib/python3.12/site-packages/onnxruntime/transformers/models/llama/llama_inputs.pyget_position_idsr      sY    !&&(//3a7Ln115#ArE*44R8     c                   t        j                  d| j                  ||ft         j                        }t        j                  ||t         j                        }t        |d      }|dk(  r|j                         n|j                  |      }|dk(  r|j                         n|j                  |      }|dk(  r|j                         n|j                  |      }|s|||fS |||d}	|	S )Nr   lowhighsizedtyper   Fr   ort	input_idsr   r   )torchrandint
vocab_sizeint64onesr   numpyto)
configdevice
batch_sizeseq_lenenginereturn_dictr   r   r   inputss
             r   get_sample_inputsr.      s     !&*;*;:wBW_d_j_jkIZZ
G5;;GN#NFL &,u_	!),,v:NI/5^))+NDUDUV\D]N+1U?<%%'PV@WL><88 ($F
 Mr   c                   t        j                  d| j                  |dft         j                        }t        j                  ||dz   t         j                        }	t        |	d      }
t        | ||||      }|dk(  r|j                         n|j                        }|dk(  r|	j                         n|	j                        }	|dk(  r|
j                         n|
j                        }
|dk(  rt        |      nt        t        fd	|            }|st        |t              sJ ||	|
|fS ||	|
d
}|dk(  r%t        |t              sJ |j                  |       |S t        |t              sJ ||d<   |S )Nr   r
   r   r   Tr   
world_sizer   c                T    | d   j                        | d   j                        fS Nr   r
   r&   kvr(   s    r   <lambda>z0get_sample_with_past_kv_inputs.<locals>.<lambda>Y   $    "Q%((6"2BqEHHV4D!Er   r   past_key_values)r    r!   r"   r#   r$   r   get_past_kv_inputsr%   r&   flatten_past_kv_inputslistmap
isinstancedictupdate)r'   r(   r)   past_seq_lenuse_fp16r+   r,   r1   r   r   r   past_kvr-   s    `           r   get_sample_with_past_kv_inputsrD   B   sq    !&*;*;:q/Y^YdYdeIZZ
L1,<EKKPN#NEL \8XbcG &,u_	!),,v:NI/5^))+NDUDUV\D]N+1U?<%%'PV@WL U? 	w'#EwOP  '4(((><AA ($F
 '4(((g
 M '4((($+ !Mr   c                   t        j                  d| j                  ||ft         j                        }t        j                  |||z   t         j                        }t        ||dk7        }t        | ||||
      }|dk(  r|j                         n|j                        }|dk(  r|j                         n|j                        }|dk(  r|j                         n|j                        }|dk(  rt        |      nt        t        fd|            }|	st        |t              sJ ||||fS |||d}|dk(  r4t        |t              sJ |j                  |       |rt        |||      }|S t        |t              sJ ||d	<   |S )
Nr   r   r   r   r0   r   c                T    | d   j                        | d   j                        fS r3   r4   r5   s    r   r7   z7get_merged_sample_with_past_kv_inputs.<locals>.<lambda>   r8   r   r   r9   )r    r!   r"   r#   r$   r   r:   r%   r&   r;   r<   r=   r>   r?   r@    enable_past_present_share_buffer)r'   r(   r)   r*   rA   max_seq_lenrB   use_buffer_sharer+   r,   r1   r   r   r   rC   r-   s    `              r   %get_merged_sample_with_past_kv_inputsrJ   z   s    !&*;*;:wBW_d_j_jkIZZ
L7,B%++VN#NQRARTL \8XbcG &,u_	!),,v:NI/5^))+NDUDUV\D]N+1U?<%%'PV@WL U? 	w'#EwOP  '4(((><AA ($F
 '4(((g5flKXF M '4((($+ !Mr   c                   |rt         j                  nt         j                  }| j                  | j                  z  }	|s*t         j
                  j                  ||| j                        j                  |      dt        j                  t        j                  |||f      d      z  j                  |      t         j
                  j                  || j                  || j                  |	      j                  |      t         j
                  j                  || j                  || j                  |	      j                  |      t        j                  |t         j                        d}
|
S t         j
                  j                  ||| j                        j                  |      t        j                  t        j                  |||ft         j                        d      dz
  j                  t         j                        t        j                  |t         j                        d}
t        | j                        D ]  }|
j                  d| dt         j
                  j                  || j                  ||	      j                  |      d	| dt         j
                  j                  || j                  ||	      j                  |      i        |rt!        |
||      }
|
S )
Ng     r
   )kr   )x	attn_maskk_cachev_cachepos)rM   rN   rQ   k__cachev_)npfloat16float32hidden_sizenum_attention_headsrandomrandastypetriur$   num_hidden_layersarrayr#   int32ranger@   rG   )r'   r)   rA   r*   rH   rB   rI   split_kvnp_dtype	head_size
ort_inputsis               r   get_msft_sample_inputsrg      s7    &rzz2::H""f&@&@@I
GV5G5GHOOPXY"RWWRWWj+{5[-\`a%bbjjkstyy~~F44lFD^D^`ifXyy~~F44lFD^D^`ifX88L9


D + 
GV5G5GHOOPXY''"'':{K*PXZX`X`"aefgjkkss 88L9

 v//0A6NBIINN"F$>$>i%fX&6NBIINN"F$>$>i%fX&	 1 9*lT_`Jr   c                |   | j                   |z  }t        | d      r| j                  n| j                  | j                  z  }|rt
        j                  nt
        j                  }t        | j                        D cg c]6  }t        j                  |||||      t        j                  |||||      f8 }	}|	S c c}w )Nhead_dimr   )num_key_value_headshasattrri   rX   rY   r    rV   rW   ra   r^   r[   )
r'   r)   rA   rB   r1   	num_headsrd   torch_dtype_rC   s
             r   r:   r:      s    **j8I#*6:#>FDVDVZ`ZtZtDtI#+%--K v//0
 1A JJz9lI[YJJz9lI[Y	
 1   Ns   :;B9c                   i }t        |       D ]n  \  }\  }}|j                         j                         j                         |d| d<   |j                         j                         j                         |d| d<   p |S )Npast_key_values..key.value)	enumeratedetachcpur%   )r9   rC   rf   past_kpast_vs        r   r;   r;      s    G(9FF.4mmo.A.A.C.I.I.K"1#T*+060C0C0E0K0K0M"1#V,-  : Nr   c                6   i }| j                         D ]t  \  }}t        |t        j                        r|||<   &|dk(  r|j	                  t        |             F|j                         j                         j                         ||<   v |rt        |||      }|S )Nr9   )
itemsr>   rU   ndarrayr@   r;   rt   ru   r%   rG   )	pt_inputsrI   rA   rH   re   rL   vs          r   convert_inputs_for_ortr}      s     J!1a$JqM##4Q78HHJNN,224JqM " 5j,P[\
r   c                    | j                         D ]Z  \  }}d|v sd|v s|j                  \  }}}}t        j                  ||||f|j                        }	||	d |d |d |d |f<   |	| |<   \ | S )Ncacher9   r   )ry   shaperU   zerosr   )
re   rA   rH   rL   r|   r)   rl   rn   rd   new_vs
             r   rG   rG     s      "1a<,123''/J	1iHHj)[)LTUT[T[\EHIE+:+z	z=L=*9*DE!JqM # r   c                   t        t        d | j                                     }t        |j                               }||z
  }t	        |      rt        d|        t        d      ||z
  }t	        |      r
|D ]  }||=  |S )Nc                    | j                   S Nname)model_inputs    r   r7   z#verify_ort_inputs.<locals>.<lambda>$  s
    {/?/?r   z(The following model inputs are missing: zEThere are missing inputs to the model. Please add them and try again.)setr=   
get_inputskeyslenprint	Exception)modelre   model_inputsuser_inputsmissing_inputsunnecessary_inputsunnecessary_inputs          r   verify_ort_inputsr   "  s    s?AQAQASTULjoo'(K!K/N
>88HIJ_`` %|3
!3,- "4 r   c                   | j                         }t        t        d | j                                     }|j	                         D ]  \  }}	||vr|rfd|v sd|v r^||vr0t        j                  |	||      }
|j                  ||
       |
||<   I||   j                  |	       |j                  |||          st        j                  |	||      }
|j                  ||
        | j                         D ]d  }|j                  }|r@d|v sd|v r8|j                  dd      j                  dd      }|j                  |||          Q|j                  |||       f ||fS )Nc                    | j                   S r   r   )rf   s    r   r7   z.add_io_bindings_as_ortvalues.<locals>.<lambda>@  s    QVVr   r   r9   )device_type	device_idoutpresent)
io_bindingr   r=   r   ry   r   ortvalue_from_numpybind_ortvalue_inputupdate_inplaceget_outputsr   replacebind_ortvalue_outputbind_output)r   re   r(   r   rI   kv_cache_ortvaluesr   r   rL   r|   v_deviceoutputr   
input_names                 r   add_io_bindings_as_ortvaluesr   6  sp    !!#Js+U-=-=-?@AL  "1 L  A1Ba1G**#77vYbc..q(;(0"1%"1%44Q7..q2DQ2GH33A6U^_H**1h7% #( ##%{{$)t2CeW5==iIZ[J++D2DZ2PQ""4Vy"Q & )))r   c           
        t        | |      }d }t        j                  t        j                  t        j                  t        j
                  d}| j                         }|j                         D ]  \  }}	|j                  ||	j                  j                  |	j                  j                  dk(  rdn|	j                  j                  |t        |	j                           t        |	j                        |	j!                                |	j                  } | j#                         D ]  }
|
j$                  }|rd|v r||j'                  dd         n||   }	|j)                  ||j                  |j                  dk(  rdn|j                  |rt        j                  nt        j
                  t        |	j                        |	j!                                 |S )N)ztorch.int32ztorch.int64ztorch.float16ztorch.float32ru   r   )r   r   r   element_typer   
buffer_ptrr   r9   )r   rU   r`   r#   rV   rW   r   ry   
bind_inputr(   typeindexreprr   tupler   data_ptrr   r   r   r   )r   r-   outputsrB   rI   r(   pt_to_npr   rL   r|   r   r   s               r   add_io_bindings_as_tensorsr   c  sr    uf-FFxxxx	H !!#J188==E1aqxx~~!$qww-0.zz| 	 	
   ##%{{  I$5 4<<	+<=> 	

 	!;;%/aV\\(0"**bjj.zz| 	 	
 &" r   c           	     V   |j                   |_        |j                  |d      }|rt        j                  nt        j
                  }	t        j                  |d   |t        j                        }
t        j                  |d   |t        j                        }t        |d      }|
j                  d   }||kD  r$|
d d d |f   }
|d d d |f   }t        |d      }n||k  r|
d d d	f   j                  d	      j                  }|d d d	f   j                  d	      j                  }t        ||z
        D ]0  }t        j                  ||
f      }
t        j                  ||f      }2 t        |d      }|
j                  d   }||k(  sJ |d
k(  r|
j                         n|
|d
k(  r|j                         n||d
k(  r|j                         n|d}|d
k7  rg |d<   |
j                  \  }}| j                  }| j                   }t#        | d      r| j$                  n| j&                  | j(                  z  }t        | j*                        D ]  }t        j,                  |||r|nd	|||	      }t        j,                  |||r|nd	|||	      }|d
k(  r:|j/                  d| d|j                         d| d|j                         i       |d   j1                  ||f        d }|d
k(  rt        j,                  ||| j2                  ||	      }d|j                         i}|st        | j*                        D ]q  }t        j,                  ||||||	      }t        j,                  ||||||	      }|j/                  d| d|j                         d| d|j                         i       s ||fS )NT)paddingr   )r(   r   r   Fr   r	   r   r   r   r9   ri   rp   rq   rr   logitszpresent.)	eos_token	pad_tokenbatch_encode_plusr    rV   rW   tensorr#   r   r   r   Tra   hstack
contiguousmax_position_embeddingsrj   rk   ri   rX   rY   r^   r   r@   appendr"   )r'   	tokenizerrequested_lengthpromptr(   rB   rI   r+   encodings_dictrm   r   r   r   tokenized_lengthinput_ids_first_colattention_mask_first_colrn   r-   r)   sequence_lengthmax_sequence_lengthrl   rd   rf   past_key
past_valuer   r   present_keypresent_values                                 r   get_initial_inputs_and_outputsr     s    $--I000FN#+%--K
 ^K8u{{[I\\.1A"B6Y^YdYdeN#NFL !r***a!2"2!223	'+<,<+<(<='EJ	,	,'1o77:<<#1!Q$#7#A#A!#D#F#F '*::;A&99%EFI"\\+C^*TUN < (EJ r*//// 06Y))+I9?5.335n5;u_//1,F
 $& ! #,//J 88**I#*6:#>FDVDVZ`ZtZtDtI 6++,;;#3
 [[#3

 U?MM&qc.0C0C0E&qc0*2G2G2I $%,,h
-CD3 -6 GZ&:K:KTZbmnV..016334#kk	?If\g !&	?If\g! s$')?)?)AXaSPVCWYfYqYqYst 5 7?r   )r   ztorch.Tensorr   bool)ptF)r'   r   r(   torch.devicer)   intr*   r   r+   strr,   r   )Fr   Fr
   )r'   r   r(   r   r)   r   rA   r   rB   r   r+   r   r,   r   r1   r   )FFr   Fr
   )r'   r   r(   r   r)   r   r*   r   rA   r   rH   r   rB   r   rI   r   r+   r   r,   r   r1   r   )r'   r   r)   r   rA   r   r*   r   rH   r   rB   r   rI   r   rb   r   )r
   )
r'   r   r)   r   rA   r   rB   r   r1   r   )r9   z'list[tuple[torch.Tensor, torch.Tensor]])Fr   i   )r{   r?   rI   r   rA   r   rH   r   )re   r?   rA   r   rH   r   )r   r   re   r?   )r   r   re   r?   r(   r   r   r   rI   r   r   r?   )
r   r   r-   r?   r   r?   rB   r   rI   r   )r'   r   r   r   r   r   r   z	list[str]r(   r   rB   r   rI   r   r+   r   )
__future__r   r%   rU   r    transformersr   r   onnxruntimer   r   r   r.   rD   rJ   rg   r:   r;   r}   rG   r   r   r   r    r   r   <module>r      s   #   2 2(   	
  P +++ + 	+
 + + + +~ "222 2 	2
 2 2 2 2 2 2 2l000 0 	0
 0 0 0 0j #	  	0	((*(*(* (* 	(*
 (* (*Z,,%),48,DH,\`,`ddd d 	d
 d d d dr   