
    gY                        d dl Z d dlZd dlmZ d dlmZmZmZ d dlZ	d dl
Z
d dlmZ d dlmZ d dlmZ d dlmZ d dlmZmZmZ d d	lmZ d d
lmZmZmZ d dlmZmZ d dl m!Z!m"Z" d dl#m$Z$  e jJ                  e&      Z'g dZ( G d d      Z)y)    N)Path)DictTupleUnion)float_to_float16_max_diff)	OnnxModel)optimize_model)version)WhisperConfigWhisperForConditionalGenerationWhisperProcessor)__version__)WhisperDecoderWhisperDecoderHelperWhisperDecoderInit)WhisperEncoderWhisperEncoderHelper)WhisperEncoderDecoderInitWhisperEncoderDecoderInitHelper)InferenceSession)zwhisper-tinyzwhisper-tiny.enzwhisper-basezwhisper-base.enzwhisper-smallzwhisper-small.enzwhisper-mediumzwhisper-medium.enzwhisper-largezwhisper-large-v2zwhisper-large-v3c                      e Zd Ze	 	 d)dededededef
d       Zedededej                  dej                  j                  fd	       Ze	 	 d*ded
ededej                  dededeeej                  j                  f   fd       Ze	 	 	 	 d+deeeeef   dej                  dededededefd       Ze	 d,dedee   fd       Ze	 	 	 	 d-dededededededededefd       Ze	 	 d.d ed!ej                  j                  dej                  d"ed#ef
d$       Zed"ed#efd%       Ze	 	 d.deded&edej                  d"ed#efd'       Zy()/WhisperHelper
output_dirmodel_name_or_pathsuffix
new_folderreturnc                 6   |}t         j                  j                  |      rt        |      j                  d   }n|j                  d      d   }||z  }|r t         j                  j                  | |      n| }t         j                  j                  ||dz         S )a  Build onnx path

        Args:
            output_dir (str): output directory
            model_name_or_path (str): pretrained model name, or path to the model checkpoint
            suffix (str, optional): suffix like "_encoder" or "_decoder_fp16" will be appended to file name. Defaults to None.
            new_folder (bool, optional): create a new directory for the model. Defaults to False.

        Returns:
            str: path of onnx model
        /z.onnx)ospathisdirr   partssplitjoin)r   r   r   r   
model_name	directorys         k/var/www/openai/venv/lib/python3.12/site-packages/onnxruntime/transformers/models/whisper/whisper_helper.pyget_onnx_pathzWhisperHelper.get_onnx_path,   s    $ (
77==+,0177;J#))#.r2Jf
<FBGGLLZ8J	ww||IzG';<<    	cache_dirdevicec                    ddl m}m}m} ddlm}m} d}| j                  d      d   dd }	d	\  }
}|	|v r |||	   ||      }
||	   }t        |
d
      5 }t        j                  ||      }ddd       ~
 |di d   } ||      }|j                  |d          ||j                  |       |j                  |      S # 1 sw Y   UxY w)  Load model given a pretrained name or path, then build models for ONNX conversion.

        Args:
            model_name_or_path (str): pretrained model name or path
            cache_dir (str): cache directory
            device (torch.device): device to run the model
            merge_encoder_and_decoder_init (bool, optional): Whether merge encoder and decoder initialization into one ONNX model. Defaults to True.
        Returns:
            Dict[str, torch.nn.Module]: mapping from name to modules for ONNX conversion.
        r   )_ALIGNMENT_HEADS_MODELS	_download)ModelDimensionsWhisperFr    r      N)NNrb)map_locationdimsmodel_state_dict )whisperr0   r1   r2   whisper.modelr3   r4   r%   opentorchloadload_state_dictset_alignment_headsto)r   r,   r-   r0   r1   r2   r3   r4   	in_memoryr'   checkpoint_filealignment_headsfp
checkpointr8   models                   r)   load_model_openaizWhisperHelper.load_model_openaiI   s      	A@:	'--c226qr:
+5( '
(;Y	RO.z:O/4(BBV<J )4F!34j);<=&%%o6xx )(s   B??C
model_implmerge_encoder_and_decoder_initstate_dict_pathc                 8   i }t        j                  t              t        j                  d      k\  rd|d<   t        j                  | fd|i|}|dk(  r2t
        j                  | ||      }|j                  |j                  }
}	|}n||}
}	d}|r&|j                  t        j                  |      d       t        |
|j                  ||	      }|j                         j                  |       |r t!        |	|
|j                  d||
      }||dS t#        |j$                  j                  |j                        }|j                         j                  |       t'        |j                  |j                        }|j                         j                  |       |||dS )r/   4.36.0eagerattn_implementationr,   openaiNF)strict)rJ   rH   )decoder_start_token_idrJ   rH   )encoder_decoder_initdecoder)encoderrU   decoder_init)r
   parsetransformers_versionr   from_pretrainedr   rI   rV   rU   r@   r>   r?   r   configevalrB   r   r   rH   r   )r   rJ   r,   r-   rK   rL   extra_kwargsrH   openai_modelmodel_encodermodel_decoderpassed_modelrU   rT   rV   rW   s                   r)   
load_modelzWhisperHelper.load_modelp   sx   & ==-.'--2II29L.//??@Rx^gxkwx!(::;MyZ`aL+7+?+?AUAU=M'L+0%=ML!!%**_"=e!L [gh&!)#<'+%"$  -AWUU$U[[%8%8%,,GGLLNf%-emmU\\JL""6*"" , r+   rH   onnx_model_pathverboseuse_external_data_formatuse_decoder_input_idsuse_int32_inputsc           	          t        | t              rt        j                  | ||||       y t        | t              rt        j                  | ||||||       y t        j                  | |||||       y N)
isinstancer   r   export_onnxr   r   r   )rH   r-   rc   rd   re   rf   rg   s          r)   rk   zWhisperHelper.export_onnx   s}     e^, ,,( 89+77%(  !,,( r+   
onnx_modelop_block_listc                    t        | j                         D cg c]  }|j                   c}      }t        |      }|j                  |      }t        j                  d| d|        | j                         j                  d   j                  }d}| j                         }||v sJ ||   }d}	|j                  dk(  r|}	t        j                  d|j                          d}
|j                  D ]  }| j                  |      }
|
 n t        |
      }t        j                  d|j                   d	|        |d
k  }n/t        j                  d|j                   d|j                          g }g }|s|	|g}|	j                  g}|t        |      ||d}t        j                  d|         | j                   dddi| |S c c}w )a  Convert model to mixed precision.
           It detects whether original model has fp16 precision weights, and set parameters for float16 conversion automatically.
        Args:
            onnx_model (OnnxModel): optimized ONNX model
            op_block_list (List[str], optional): . Defaults to ["SimplifiedLayerNormalization", "SkipSimplifiedLayerNormalization", "Relu", "Add"]
        Returns:
            parameters(dict): a dictionary of parameters used in float16 conversion
        z	fp32 op: z
 fp16 op: r   FNMatMulz#Found last MatMul node for logits: z3max diff of converting weights in last MatMul node z: gư>z-Failed to find MatMul node for logits. Found z	 of node )keep_io_typesrm   node_block_listforce_fp16_initializersz!auto_mixed_precision parameters: use_symbolic_shape_inferTr:   )setnodesop_type
differenceloggerinfographoutputnameoutput_name_to_nodeinputget_initializerr   debugwarninglistconvert_float_to_float16)rl   rm   nodeop_full_setfp32_op_setfp16_op_setlogits_output_nameis_weight_fp16_precisionr}   last_matmul_nodeinitializerr~   max_diffrp   rq   
parameterss                   r)   auto_mixed_precisionz"WhisperHelper.auto_mixed_precision   s   $ J4D4D4FG4FD4<<4FGH-(!,,[9i}J{mDE (--/66q9>> $) (<<>!%8888"#56<<8##KK=dii[IJK(88?* $ 1=HLLNtyykY[\d[efg'/$$NNJ4<<.Xabfbkbkalmn(/?/K/0M/445O +!-0.'?	

 	7
|DE+
++XTXZXa Hs   Goptimized_model_path
is_float16num_attention_headshidden_sizer   use_gpuproviderc	           
          ddl m}	  |	d      }
d|
_        |dk(  |
_        t	        | d|||sdnd|
|d	      }|r*|rt
        j                  |       n|j                  d
       |j                  ||d       y)zHOptimize ONNX model with an option to convert it to use mixed precision.r   )FusionOptionsbartTrocm   NF)
model_type	num_headsr   	opt_leveloptimization_optionsr   only_onnxruntime)cast_input_output)all_tensors_to_one_file)	fusion_optionsr   use_multi_head_attention!disable_multi_head_attention_biasr	   r   r    convert_model_float32_to_float16save_model_to_file)rc   r   r   r   r   re   r   r   r   r   r   ms               r)   optimize_onnxzWhisperHelper.optimize_onnx  s     	1,V48<5AIVAS>)#7aT!5"	
 #221522U2K	13Keijr+   	processorpt_model
batch_sizeprompt_modec           
         	 ddl m} ddl m}  |d	d
d      }g }	|dk(  r | |d   d   d   gd      j                  }
ni | |d   d   d   gd      j                   | |d   d   d   gd      j                  g}	t        |	      |k(  sJ t        j                  |	d   |	d   f      }
d\  }}}}d\  }}|
j                  |      ||||||ddd	}|rddg}|D cg c]  }| j                  |       }}g }g }t        |      D ]  }t        j                  ||         |d<   |	|   j                  |      |d<    |j                   di |j#                         j%                         j'                         }|j)                  |       |j)                  | j+                  |d      d           |
|d<   |d= n`g } |j                   di |j#                         j%                         j'                         }| j+                  |d      d   g}t-        |      }|d= |d= ||||fS # t        $ rU}t        j	                  d| d       d}t        j                  d| d       t        j                  |       Y d }~qd }~ww xY wc c}w )Nr   )load_datasetz.An error occurred while importing `datasets`: T)exc_infozpip install datasetszCCould not import `datasets`. Attempting to install `datasets` via `z`.z)hf-internal-testing/librispeech_asr_dummyclean
validation)r%      audioarraypt)return_tensors   )   r   r   r   )      ?r   )	input_features
max_length
min_length	num_beamsnum_return_sequenceslength_penaltyrepetition_penaltyearly_stopping	use_cachezJohn has doubtszMaria has grave doubts
prompt_idsr   skip_special_tokensr   r   r:   )datasetsr   	Exceptionrx   errorr   r!   systemr   lenr>   catrB   get_prompt_idsrange
from_numpygeneratedetachcpunumpyappendbatch_decoder   )r   r   r-   r   r   r   einstall_cmddsinput_features_r   r   r   r   r   r   r   inputspromptspr   pt_transcription
pt_outputsi	pt_outputs                            r)    pt_transcription_for_verify_onnxz.WhisperHelper.pt_transcription_for_verify_onnx<  s   	#- 	*EwVbc?&1gw(?'@QUVeeN 2a5>'23DIXX2a5>'23DIXXO ':555"YY(:OA<N'OPNBM?
J	+?-5**,//7$$"$8,"4"

 (*BCG?FGw!)2215wJG!J :&','7'7
1'F|$+:1+=+@+@+H'(-H--77>>@DDFLLN	!!), ''	(>(>y^b(>(cde(fg ' (6F#$|$J***4V4;;=AACIIKJ ) 6 6zW[ 6 \]^ _`j)J#$;'Z??s  	#LLI!MX\L]0KNN`al`mmopqIIk""		#H Hs   H  J 	I>)A
I99I>c                 J    | dkD  r|rd}d}d}d}||||h}|S d}d}d}	|||	h}|S )	Nr   z{ John has doubts whether Sir Frederick Layton's work is really Greek after all and can discover in it but little of Rocky Izy John has doubts whether Sir Frederick Latins work is really Greek after all and can discover in it but little of Rocky Iz Maria has grave doubts whether Sir Frederick Layton's work is really Greek after all and can discover in it but little of Rockyz Maria has grave doubts whether Sir Frederick Latins work is really Greek after all and can discover in it but little of Rocky IzX Mr. Quilter is the apostle of the middle classes and we are glad to welcome his gospel.zY Mr. Quilter is the apostle of the middle classes, and we are glad to welcome his gospel.zZ "Mr. Quilter is the apostle of the middle classes, and we are glad to welcome his gospel.r:   )
r   r   'expected_transcription_no_comma_prompt1)expected_transcription_misspelled_prompt1'expected_transcription_no_comma_prompt2)expected_transcription_misspelled_prompt2expected_transcription_optionsexpected_transcription_no_comma!expected_transcription_with_comma+expected_transcription_with_quote_and_commas
             r)   select_transcription_optionsz*WhisperHelper.select_transcription_options  s    
 >k 7t3 9t5 7y3 9{57799	.** .- k , l . m 8 01;.*
 .-r+   ort_sessionc           	      "
   i }t        j                  t              t        j                  d      k\  rd|d<   t        j                  | fd|i|j                  |      }t        j                  | |      }t        j                  | |      }	t        j                  |||||      \  }
}}}|	j                  g}|j                  dd	      }t        t        d
 |            }||z   }t        t        d |j                                     }t        t        d |j                                     }t        j                   t        j"                  t        j$                  t        j&                  t        j(                  t        j*                  d}d|v }t-        ||      D ]5  \  }}|dk(  r3|
|   j/                         j1                         j3                         |
|<   ?|dk(  r(t        j4                  |	j6                  ||         |
|<   l|dk(  r*t        j4                  ||	j6                  f||         |
|<   |dk(  r|s&|r|gn|g}t        j8                  |||         |
|<   g }t;        |      D ]$  }|j=                  ||   j?                                & tA        d |D              }g }|D ]6  }g ||	jB                  g|tE        |      z
  z  }|j=                  ||z          8 t        j8                  |||         |
|<   j|dk(  r t        j8                  dg||         |
|<   |dk(  r"t        j8                  ddgg||         |
|<   |dk(  r5t        jF                  t        j8                  |g||         |d      |
|<   |dk(  r t        j8                  dg||         |
|<   t        j8                  |
|   g||         |
|<   8 |jI                  d|
      d   dddddf   }|jK                  |d      }t        jM                  ||      } d}!t;        |      D ]  }|!||   | v xr ||   | v z  }! d}"|!st;        |      D ]  }||   jN                  ||   jN                  k7  r!||   ||   dddtE        ||         f   z
  }#n||   ||   z
  }#tA        |#jQ                         |#jA                         tR              }$tA        |"|$      }" |"dk7  r0tT        jW                  d|        tT        jW                  d |        |"S )!zRCompare the result from PyTorch and ONNX Runtime to verify the ONNX model is good.rN   rO   rP   r,   )r,   )r   r   english
transcribe)languagetaskc                     | d   S )Nr   r:   )tokens    r)   <lambda>z+WhisperHelper.verify_onnx.<locals>.<lambda>  s    E!Hr+   c                     | j                   S ri   )r|   entrys    r)   r   z+WhisperHelper.verify_onnx.<locals>.<lambda>  s    5::r+   c                     | j                   S ri   )typer   s    r)   r   z+WhisperHelper.verify_onnx.<locals>.<lambda>  s    EJJr+   )ztensor(float)ztensor(float16)ztensor(int64)ztensor(int32)ztensor(int8)ztensor(uint8)extra_decoding_idsr   
vocab_mask)dtypeprefix_vocab_maskdecoder_input_idsc              3   2   K   | ]  }t        |        y wri   )r   ).0r   s     r)   	<genexpr>z,WhisperHelper.verify_onnx.<locals>.<genexpr>  s     !>+Q#a&+s   logits_processorr   cross_qk_layer_headr   temperaturer   NTr   )keyzPyTorch outputs: zONNX Runtime outputs: ),r
   rX   rY   r   rZ   rB   r   r   r   r   rS   get_decoder_prompt_idsr   map
get_inputsnpfloat32float16int64int32int8uint8zipr   r   r   ones
vocab_sizer   r   r   tolistmaxpad_token_idr   repeatrunr   r   shapeminabsrx   r   )%r   r,   r   r-   r   r   r]   r   r   r[   r   r   r   decoder_prompt_idsstart_idr   forced_decoder_ids	ort_names
ort_dtypes	ort_to_npuse_extra_decoding_idsr|   r   raw_input_idsort_promptsr   max_lenpadded_promptsr   padded_promptort_outputsort_transcriptionr   parityr   diff
max_diff_is%                                        r)   verify_onnxzWhisperHelper.verify_onnx  s    ==-.'--2II29L./2BB
*3
7C

"V* 	 %445GS\]	../AYWCPCqCq!# Dr D
@ *.@ 11255y|5\
#4jAB
%
25{7M7M7OPQ	#68N8N8PQR
ZZ!zzXXXXGGXX
	 "6!By*5KD%''%d|22488:@@Bt%!wwv'8'8	%@PQt,,!ww
F4E4E'FiX]N^_t,,"2HXJOaNbM#%88M5AQ#RF4L #%K":.#**+=a+@+G+G+IJ /!!>+!>>G%'N(([!([0C0C/DRUVWRXHX/Y([&--m>P.PQ ) $&88N)EBR#SF4L++!xx9U3CDt..!xx!Q	%8HIt--!yy:,iPUFV)WYcefgt&!xxYu5EFt!xxi>NOtG 6H "oodF3A6q!Qw?%22;TX2Y)6)S)ST^`k)l&z"A #'EE K%a(,JJF #
 :&a=&&+a.*>*>>%a=;q>!=Qs:a=?Q=Q:Q+RRD%a=;q>9D TXXZSA
x4 ' q=NN./?.@ABNN34E3FGHr+   N) F)Tr+  )TFTF))SimplifiedLayerNormalization SkipSimplifiedLayerNormalizationReluAdd)FTFr   )r   F)__name__
__module____qualname__staticmethodstrboolr*   r>   r-   nnModulerI   r   rb   r   r   r   r   r   rk   r   r   r   intr   r   r   r   r   r*  r:   r+   r)   r   r   +   s     	=== = 	=
 
= =8 $ $ $  $  
	$  $ L  04!888 8 	8
 )-8 8 
c588??"	#8 8t 
 ).&*!&#^^5GIbbc## # 	#
 #'#  $# # #J %
AASzA AF  */%)$k$k!$k $k !	$k
 $k #'$k #$k $k $k $kL 
 !C@#C@((//C@ C@ 	C@
 C@ C@J ... .@  !hhh &h 	h
 h h hr+   r   )*loggingr!   pathlibr   typingr   r   r   r   r  r>   r	  r   rl   r   	optimizerr	   	packagingr
   transformersr   r   r   r   rY   whisper_decoderr   r   r   whisper_encoderr   r   whisper_encoder_decoder_initr   r   onnxruntimer   	getLoggerr0  rx   PRETRAINED_WHISPER_MODELSr   r:   r+   r)   <module>rE     sf     	  % %   -   $  Y Y < T T @ c (			8	$ a ar+   