
    g*                         d dl Z d dlZd dlmZ d dlmZmZmZ d dlZd dl	m
Z
 d dlmZ d dlmZ d dlmZmZmZ d dlmZmZ d d	lmZmZ d d
lmZmZ d dlmZ  e j<                  e      Z g dZ!g dZ" G d d      Z#y)    N)Path)DictListUnion)float_to_float16_max_diff)	OnnxModel)optimize_model)	T5DecoderT5DecoderHelperT5DecoderInit)	T5EncoderT5EncoderHelper)T5EncoderDecoderInitT5EncoderDecoderInitHelper)MT5ForConditionalGenerationT5ForConditionalGeneration)InferenceSession)zt5-smallzt5-basezt5-largezt5-3bzt5-11b)zgoogle/mt5-smallzgoogle/mt5-basezgoogle/mt5-largezgoogle/mt5-xlzgoogle/mt5-xxlc                      e Zd Ze	 	 d"dededededef
d       Ze	 	 	 d#dededej                  d	ed
edede	eej                  j                  f   fd       Ze	 	 	 	 d$deeeeef   dej                  dededededefd       Zeg dfdedee   fd       Ze	 	 	 d%dedededededededefd       Zedeeeeef   dedej                  defd        Zy!)&T5Helper
output_dirmodel_name_or_pathsuffix
new_folderreturnc                 6   |}t         j                  j                  |      rt        |      j                  d   }n|j                  d      d    ||z  }|r t         j                  j                  | |      n| }t         j                  j                  ||dz         S )a  Build onnx path

        Args:
            output_dir (str): output directory
            model_name_or_path (str): pretrained model name, or path to the model checkpoint
            suffix (str, optional): suffix like "_encoder" or "_decoder_fp16" will be appended to file name. Defaults to None.
            new_folder (bool, optional): create a new directory for the model. Defaults to False.

        Returns:
            str: path of onnx model
        /z.onnx)ospathisdirr   partssplitjoin)r   r   r   r   
model_name	directorys         a/var/www/openai/venv/lib/python3.12/site-packages/onnxruntime/transformers/models/t5/t5_helper.pyget_onnx_pathzT5Helper.get_onnx_path   s    $ (
77==+,0177;JS!"%f
<FBGGLLZ8J	ww||IzG';<<    	cache_dirdevicemerge_encoder_and_decoder_init
model_typestate_dict_pathc                    |dk(  rt        j                  | |      }n(|dk(  rt        j                  | |      }nt        d      |r$|j	                  t        j                  |             t        |j                  |j                  |j                        }|j                         j                  |       |r=t        |j                  |j                  |j                  |j                  d      }||dS t        |j                  |j                        }	|	j                         j                  |       t!        |j                  |j                  |j                        }
|
j                         j                  |       |	||
dS )	ab  Load model given a pretrained name or path, then build models for ONNX conversion.

        Args:
            model_name_or_path (str): pretrained model name or path
            cache_dir (str): cache directory
            device (torch.device): device to run the model
            merge_encoder_and_decoder_init (bool, optional): Whether merge encoder and decoder initialization into one ONNX model. Defaults to True.
            is_mt5 (bool, optional): whether the model is MT5 instead of T5
        Returns:
            Dict[str, torch.nn.Module]: mapping from name to modules for ONNX conversion.
        t5)r)   mt5z only support mode_type=t5 or mt5N)decoder_start_token_id)encoder_decoder_initdecoder)encoderr3   decoder_init)r   from_pretrainedr   
ValueErrorload_state_dicttorchloadr
   r3   lm_headconfigevaltor   r4   r   r   )r   r)   r*   r+   r,   r-   modelr3   r2   r4   r5   s              r&   
load_modelzT5Helper.load_model;   s7   ( .>>?Q]fgE5 /??@R^ghE?@@!!%**_"=>EMM5==%,,G&!)#7'+$  -AWUUu||<GLLNf%(u||TL""6*"" , r(   r?   onnx_model_pathverboseuse_external_data_formatuse_decoder_input_idsuse_int32_inputsc           	          t        | t              rt        j                  | |||||       y t        | t              rt        j                  | ||||||       y t        j                  | |||||       y )N)
isinstancer   r   export_onnxr   r   r   )r?   r*   rA   rB   rC   rD   rE   s          r&   rH   zT5Helper.export_onnxp   s     eY'''(  34&22%(  ''( r(   )SimplifiedLayerNormalization SkipSimplifiedLayerNormalizationReluAdd
onnx_modelop_block_listc                 n   | j                         D ch c]  }|j                   }}t        |      }|j                  |      }t        j                  d| d|        | j                         j                  d   j                  }d}| j                         }||v sJ ||   }d}	|j                  dk(  r|}	t        j                  d|j                          d}
|j                  D ]  }| j                  |      }
|
 n t        |
      }t        j                  d|j                   d	|        |d
k  }n/t        j                  d|j                   d|j                          g }g }|s|	|g}|	j                  g}||||d}t        j                  d|         | j                  dddi| |S c c}w )a  Convert model to mixed precision.
           It detects whether original model has fp16 precision weights, and set parameters for float16 conversion automatically.
        Args:
            onnx_model (OnnxModel): optimized ONNX model
            op_block_list (List[str], optional): . Defaults to ["SimplifiedLayerNormalization", "SkipSimplifiedLayerNormalization", "Relu", "Add"]
        Returns:
            parameters(dict): a dictionary of parameters used in float16 conversion
        z	fp32 op: z
 fp16 op: r   FNMatMulz#Found last MatMul node for logits: z3max diff of converting weights in last MatMul node z: gư>z-Failed to find MatMul node for logits. Found z	 of node )keep_io_typesrN   node_block_listforce_fp16_initializersz!auto_mixed_precision parameters: use_symbolic_shape_inferT )nodesop_typeset
differenceloggerinfographoutputnameoutput_name_to_nodeinputget_initializerr   debugwarningconvert_float_to_float16)rM   rN   nodeop_full_setfp32_op_setfp16_op_setlogits_output_nameis_weight_fp16_precisionr_   last_matmul_nodeinitializerr`   max_diffrQ   rR   
parameterss                   r&   auto_mixed_precisionzT5Helper.auto_mixed_precision   s   $ 1;0@0@0BC0Bt||0BC-(!,,[9i}J{mDE (--/66q9>> $) (<<>!%8888"#56<<8##KK=dii[IJK(88?* $ 1=HLLNtyykY[\d[efg'/$$NNJ4<<.Xabfbkbkalmn(/?/K/0M/445O +*.'?	

 	7
|DE+
++XTXZXa Ds   F2optimized_model_path
is_float16num_attention_headshidden_sizero   use_gpuc           
          ddl m} d}	|r |d      }	d|	_        t        | d|||sdnd|	d|       }
|r*|rt        j                  |
       n|
j                  d       |
j                  ||d	
       y)zHOptimize ONNX model with an option to convert it to use mixed precision.r   )FusionOptionsNr/   F   )r,   	num_headsrs   	opt_leveloptimization_optionsrt   only_onnxruntime)cast_input_outputT)all_tensors_to_one_file)fusion_optionsrv   enable_skip_layer_normr	   r   ro    convert_model_float32_to_float16save_model_to_file)rA   rp   rq   rr   rs   rC   ro   rt   rv   rz   ms              r&   optimize_onnxzT5Helper.optimize_onnx   s     	1##0#6 :? 7)#7aQ!5!([	
 #--a022U2K	13Keijr(   ort_sessionc                     t        | t              rt        j                  | |||      S t        | t              rt        j                  | |||      S t        j                  | |||      S )zQCompare the result from PyTorch and OnnxRuntime to verify the ONNX model is good.)rG   r   r   verify_onnxr   r   r   )r?   r   r*   rE   s       r&   r   zT5Helper.verify_onnx  sa     eY'"..uk6K[\\e12-99%fVfgg**5+vGWXXr(   N) F)Tr/   r   )TFTF)FTF)__name__
__module____qualname__staticmethodstrboolr'   r9   r*   r   nnModuler@   r   r   r
   r   r   rH   r   r   ro   intr   r   r   rU   r(   r&   r   r      s3     	=== = 	=
 
= =8 
 04!222 2 )-	2
 2 2 
c588??"	#2 2h 
 ).&*!&$Y	=:NNO$$ $ 	$
 #'$  $$ $ $L $
AACyA AF  */%)$k$k!$k $k !	$k
 $k #'$k #$k $k $kL YY	=:NNOY%Y Y 	Y Yr(   r   )$loggingr   pathlibr   typingr   r   r   r9   float16r   rM   r   	optimizerr	   
t5_decoderr
   r   r   
t5_encoderr   r   t5_encoder_decoder_initr   r   transformersr   r   onnxruntimer   	getLoggerr   rZ   PRETRAINED_T5_MODELSPRETRAINED_MT5_MODELSr   rU   r(   r&   <module>r      s`     	  $ $  -   $ @ @ 1 T P (			8	$M v sY sYr(   