
    gA                     t   d dl Z d dlZd dlZd dlmZ d dlmZmZmZ d dl	Z	d dl
Z
d dlZd dlmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZmZ d d
lmZ  e j6                  e      Z G d dej<                  j>                        Z  G d dej<                  j>                        Z! G d d      Z" G d d      Z#y)    N)Path)ListOptionalUnion)
TypeHelper)	OnnxModel)PastKeyValuesHelper)T5EncoderInputs)torch_onnx_export)	MT5ConfigT5Config)InferenceSessionc            
            e Zd ZdZ	 ddej
                  j                  dej
                  j                  deee	f   de
e   f fdZdej                  dej                  d	ej                  fd
Z xZS )T5DecoderInitz~A T5 decoder with LM head to create initial past key values.
    This model is only called once during starting decoding.
    decoderlm_headconfigdecoder_start_token_idc                     t         |           || _        || _        || _        ||n| j                  j
                  | _        t        | j                  d      r| j                  j                  | _        y d| _        y Ntie_word_embeddingsT)super__init__r   r   r   r   hasattrr   )selfr   r   r   r   	__class__s        b/var/www/openai/venv/lib/python3.12/site-packages/onnxruntime/transformers/models/t5/t5_decoder.pyr   zT5DecoderInit.__init__!   sr     	&<&H"dkkNpNp 	# 07t{{DY/ZDKK++ 	 `d 	     decoder_input_idsencoder_attention_maskencoder_hidden_statesc                    |N|j                   d   }t        j                  |dft        j                  |j                        | j
                  z  }| j                  |||dd      }|j                  }|j                  }| j                  r|| j                  j                  dz  z  }| j                  |      }t        j                  |      \  }	}
||	|
fS )Nr      dtypedeviceT)	input_idsr!   r    	use_cachereturn_dict      )shapetorchoneslongr&   r   r   last_hidden_statepast_key_valuesr   r   d_modelr   r	   group_by_self_or_cross)r   r   r    r!   
batch_sizedecoder_outputssequence_outputpresent_key_values	lm_logits	past_self
past_crosss              r   forwardzT5DecoderInit.forward3   s     $/55a8J

O**188
 --.  ,,'"7#9 ' 
 *;;,<<##-1D1Dd1JKOLL1	 3 J JK] ^	:)Z//r   N)__name__
__module____qualname____doc__r,   nnModuler   r   r   r   intr   TensorFloatTensorr:   __classcell__r   s   @r   r   r      s     15

 
 h	)*	

 !)
$!0 <<!0 !&!0  %00	!0r   r   c                   (     e Zd ZdZ fdZd Z xZS )	T5Decoderz-A T5 decoder with LM head and past key valuesc                     t         |           || _        || _        || _        t        | j                  d      r| j                  j                  | _        y d| _        y r   )r   r   r   r   r   r   r   )r   r   r   r   r   s       r   r   zT5Decoder.__init__Z   sQ    /6t{{DY/ZDKK++ 	 `d 	 r   c                    | j                   j                  }t        j                  ||      }|j	                  d      }| j                  ||||dd      }|j                  }|j                  }	| j                  r|| j                   j                  dz  z  }| j                  |      }
t        j                  |	      \  }}|
|fS )N   T)r'   r0   r!   r    r(   r)   r*   )r   num_decoder_layersr	   group_by_layer	unsqueezer   r/   r0   r   r1   r   r2   )r   r   r    pastrL   r0   dummy_encoder_hidden_statesr4   r5   r6   r7   present_self_s                r   r:   zT5Decoder.forwardc   s    ![[;;-<<TCUV '=&F&Fq&I#,,'+"=#9 ' 
 *;;,<<##-1D1Dd1JKOLL1	-DDEWXa ,&&r   )r<   r=   r>   r?   r   r:   rE   rF   s   @r   rH   rH   W   s    7
'r   rH   c                   z    e Zd Z	 ddZe	 	 ddeeef   dededede	j                  ded	efd
       ZdefdZd Zy)T5DecoderInputsNc                 .    || _         || _        || _        y r;   )r   r    r0   )r   r   r    r0   s       r   r   zT5DecoderInputs.__init__   s     4E8N#]lr   r   r3   encode_sequence_lengthpast_decode_sequence_lengthr&   float16use_int32_inputsc                    | j                   }| j                  }| j                  }	| j                  }
d}t	        j
                  d|	dz
  ||f|rt        j                  nt        j                  |      }t        j                  |||	||      }|rt        j                  nt        j                  }|dkD  r||||
g}||||
g}g }t        d|z        D ])  }|j                  t	        j                  |||             + t        d|z        D ])  }|j                  t	        j                  |||             + nd}t        ||j                   |      S )aZ  Create dummy inputs for T5Decoder.

        Args:
            decoder: decoder
            batch_size (int): batch size
            encode_sequence_length (int): sequence length of input_ids for encoder
            past_decode_sequence_length (int): past sequence length of input_ids for decoder
            device (torch.device): device of output tensors
            float16 (bool): whether the model uses float32 or float16 in input
            use_int32_inputs(bool): whether use int32 instead of int64 for some inputs

        Returns:
            T5DecoderInputs: dummy inputs for decoder
        r#   r   )lowhighsizer%   r&   )rY   rK   r$   N)	num_headsrL   
vocab_sized_kvr,   randintint32int64r
   create_dummyrX   float32rangeappendrandrT   attention_mask)r   r3   rV   rW   r&   rX   rY   num_attention_heads
num_layersr_   	head_sizesequence_lengthr   encoder_inputs
float_typeself_attention_past_shapecross_attention_past_shaperO   rR   s                      r   rd   zT5DecoderInputs.create_dummy   sR   0 $*#3#3 33
 ++
  	 !MMao."25;;
 )55"-
 '.U]]5==
&*#+	)% #&	*& D1z>*EJJ'@
[abc + 1z>*EJJ'A\bcd + D0.2O2OQUVVr   returnc                     | j                   | j                  g}| j                  r|j                  | j                         |S r;   )r   r    r0   extend)r   
input_lists     r   to_listzT5DecoderInputs.to_list   s@    ""''

 d223r   c                    | j                   r8| j                   D cg c]"  }|j                  t        j                        $ c}nd }t	        | j
                  j                         | j                  j                         |      S c c}w )N)r%   )r0   tor,   re   rT   r   cloner    )r   prO   s      r   to_fp32zT5DecoderInputs.to_fp32   sr    LPL`L`43G3GH3Ga5==)3GHfj""((*''--/
 	
 Is   'Br;   )FF)r<   r=   r>   r   staticmethodr   r   r   rB   r,   r&   boolrd   r   rv   r{    r   r   rT   rT      s    
 	m  !&IWh	)*IWIW !$IW &)	IW
 IW IW IW IWV 
r   rT   c                       e Zd Ze	 	 	 ddeeef   dej                  de	de
de
de
fd       Zedefd	       Ze	 dd
eeef   dedej                  de
def
d       Zy)T5DecoderHelperr   r&   onnx_model_pathverboseuse_external_data_formatrY   c                    t        | t        t        f      sJ t        j	                  | j
                  ddt        | t              rdnd||      }|j                         }| j
                  j                  }t        j                  |d      }	t        j                  |d      }
|
d	d|z   }t        | t              r|	ng }t        | t              r|n|
}d
g|}dg}|j                  d       |j                  |       ddiddddddddid}|D ]  }dd|v rdndd||<    |D ]/  }d|v r	ddd||<   t        | t              r	ddd||<   )ddi||<   1 t        |      j                  j                  dd       t        j                          5 }t"        j$                  j'                  |d      }t        |      j                  j                  dd       t)        | t+        |      |r|n|d|||dd||       |r0t-        j.                  |d      }t1        j2                  ||dd       d	d	d	       y	# 1 sw Y   y	xY w)a  Export decoder to ONNX

        Args:
            decoder (Union[T5Decoder, T5DecoderNoPastState]): decoder object
            device (torch.device): device of decoder object
            onnx_model_path (str): onnx path
            verbose (bool, optional): print verbose information. Defaults to True.
            use_external_data_format (bool, optional): use external data format or not. Defaults to False.
            use_int32_inputs (bool, optional): use int32 inputs
        rK         r   )r3   rV   rW   r&   rY   F)presentTNlogitsr'   r    r3   rV   )r   r#   )r'   r    r!   r   r   rW   )r   rK   crosszpast_decode_sequence_length + 1)parentsexist_okzdecoder.onnx   )
argsfexport_paramsinput_namesoutput_namesdynamic_axesopset_versiondo_constant_foldingr   r   )load_external_data)save_as_external_dataall_tensors_to_one_file)
isinstancerH   r   rT   rd   r   rv   rL   r	   get_past_namesrg   rt   r   parentmkdirtempfileTemporaryDirectoryospathjoinr   tupleonnx
load_modelr   save)r   r&   r   r   r   rY   inputsru   rL   
past_namespresent_namespresent_self_namesinput_past_namesoutput_present_namesr   r   r   nametmp_dir_nametemp_onnx_model_pathmodels                        r   export_onnxzT5DecoderHelper.export_onnx   s~   & 'I}#=>>> --NN#$-7-KQR- . 
 ^^%
$^^>>(778JTYZ
+::;MW[\*+CQ1C-CD)3GY)G:R5?5S1Yf 8#78 #m34+, < +7;S&T)5:R%S<
 %D4:dN0H`"L % )D$)5:R%ST"gy1'<*L& <*L& ) 	_$$**4$*G((*l#%77<<n#M %&--33D43P:&*B&"')) $()A ((<QUV#*.,0	' +**s   BH77I r   c                    t         j                  d       t        j                  |j                  j                         j                               t        j                  |j                  j                         j                               d}|j                  rt        |j                        dz  dk(  sJ t        t        |j                        dz        }t        j                  |      }t        |j                        D ]<  \  }}t        j                  |j                         j                               |||   <   > | j                  d|      }|S )zRun inference of ONNX model.zstart onnxruntime_inference)r'   r       r   N)loggerdebugnumpyascontiguousarrayr   cpur    r0   lenrB   r	   r   	enumeraterun)ort_sessionr   
ort_inputsrk   r   ipast_tensorort_outputss           r   onnxruntime_inferencez%T5DecoderHelper.onnxruntime_inference_  s    	23 001I1I1M1M1O1U1U1WX&+&=&=f>[>[>_>_>a>g>g>i&j


 !!v--.2a777S!7!781<=J,;;JGJ"+F,B,B"C;,1,C,CKOODUD[D[D],^
:a=) #D "oodJ7r   r   r   	max_casesc                 H   t        j                  |d      dk(  }g d}g }|d| D ]l  \  }}	}
t        | t              rd}
t        j                  | j                  ||	|
|||      }|j                         j                         }t        j                         5   | | }ddd       t        j                  ||      }| j                  j                  }t        j                  t        j                   d   j#                         j                         |d   z
              }|}t$        j'                  d|        t)        d|z        D ]|  }t        j                  t        j                   |d	   |   j#                         j                         |d	|z      z
              }t$        j'                  d
| d|        t+        ||      }~ t        | t              rt)        d|z        D ]  }t        j                  t        j                   |d   |   j#                         j                         |d	d|z  z   |z      z
              }t$        j'                  d| d|        t+        ||      } |j-                  |       t$        j/                  d||	|
|       o S # 1 sw Y   xY w)zQCompare the result from PyTorch and OnnxRuntime to verify the ONNX model is good.past_key_self_0ztensor(float16)))r      r   )r#   rK   r   )r   r#   r#   )   r   rK   Nr   )r&   rX   rY   zlogits max_diff=rK   r#   zself attention past state z
 max_diff=zcross attention past state zUbatch_size=%s, encode_sequence_length=%s, past_decode_sequence_length=%s, max_diff=%s)r   get_input_typer   r   rT   rd   r   r{   rv   r,   no_gradr   r   rL   r   amaxabsr   r   r   rf   maxrg   info)r   r   r&   rY   r   rX   
test_casestest_cases_max_diffr3   rV   rW   r   ru   torch_outputsr   rL   max_diffmax_diff_allr   s                      r   verify_onnxzT5DecoderHelper.verify_onnxs  s    #11+?PQUffB
 
 
#	
"'%/./+$11&+!1 2 F  )113J  %z 2 ! *??VTK!&!@!@zz%))M!,<,@,@,B,H,H,J[YZ^,["\]H#LLL+H:671112 ::eiia0@0C0G0G0I0O0O0QT_`ade`eTf0f&gh9!JxjQR"<: 3
 %/q#556A$zz		-"21"5"9"9";"A"A"CkRSVWZlVlRlopRpFq"qr H LL#>qcH:!VW#&|X#>L 7  &&|4KKg&+U $d A !s   JJ!	N)TFF)r   )r<   r=   r>   r|   r   rH   r   r,   r&   strr}   r   rT   r   r   rB   r   r~   r   r   r   r      s    
 ).!&sy-/0ss s 	s
 #'s s sj ?  &  BY-.B%B B 	B
 B Br   r   )$loggingr   r   pathlibr   typingr   r   r   r   r   r,   io_binding_helperr   
onnx_modelr   past_helperr	   
t5_encoderr
   torch_onnx_export_helperr   transformersr   r   onnxruntimer   	getLoggerr<   r   r@   rA   r   rH   rT   r   r~   r   r   <module>r      s     	   ( (    (   + & 6 , (			8	$80EHHOO 80v%' %'Pf
 f
RN Nr   