
    gX                     v   d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlZ	d dl
Z
d dlZd dlZd dlmZmZ d dlmZ d dlmZ d dlmZmZmZ d dlmZ d dlmZmZmZ d dlZ ej@                  e!      Z"de jF                  fd	Z$de jF                  fd
Z%d Z&d Z'd Z(d Z)d Z*d Z+d Z,d Z-e!dk(  r e-        yy)    N)measure_memorysetup_logger)get_library_path)ORTModelForSpeechSeq2Seq)ProfilerActivityprofilerecord_function)trange)AutoModelForSpeechSeq2SeqWhisperConfigWhisperProcessorargsc                    	
  j                   dvrt        d       fd fd	 j                   j                   j                   j
                   j                   j                  d} j                   dk(  r|j                         D ]B  \  }}t        j                  |gd|v rt        j                  nt        j                        ||<   D  j                  r3t        j                   j                  gt        j                        |d	<    j                  r3t        j                   j                   gt        j                        |d
<    j"                  r3t        j                   j$                  gt        j                        |d<   t&        j)                  d j*                          	fd}t-         | j.                          | j.                        } j.                  r||d<   |S t&        j)                  d        j                   dk(  rdnd
 
fd}t-         ||        ||      } j                   dk(  r||d<   |S |j1                   j2                  rt4        j6                  nt4        j                   j8                        |d<    j:                  |d<   d|d<   d|d<    j                  r j                  |d<   |S )N>   orthf-orthf-pt-eagerhf-pt-compilez/Unable to auto-detect inputs for provided modelc                  p    t        j                  j                        } t        j                  |       } | S N)whisper
load_audio
audio_pathpad_or_trim)audior   s    f/var/www/openai/venv/lib/python3.12/site-packages/onnxruntime/transformers/models/whisper/benchmark.pyload_via_ffmpegz#get_inputs.<locals>.load_via_ffmpeg$   s,    ""4??3##E*    c                     t        j                  d      5 } t        j                  t	        | j                               t        j                        }t        j                  |g      }d d d        |S # 1 sw Y   S xY w)Nrbdtype)openr   npasarraylistreaduint8array)fr   r   s     r   load_via_numpyz"get_inputs.<locals>.load_via_numpy)   sZ    $//4(AJJtAFFH~RXX>EHHeW%E )  ) s   AA55A?)
max_length
min_length	num_beamsnum_return_sequenceslength_penaltyrepetition_penaltyr   penaltyr    decoder_input_idslogits_processortemperaturezLoad audio: c                 $    | r        S         S r    )onnx_e2er   r*   s    r   <lambda>zget_inputs.<locals>.<lambda>D   s    ^%5%XGX%Xr   audio_streamzFeature extraction: r#   ptc                 j    j                   j                  | gj                        j                  S )N)return_tensorssampling_rate)	processorfeature_extractorr=   input_features)r   r   return_types    r   r8   zget_inputs.<locals>.<lambda>P   s3    !A!A	4;M;M "B "n"r   r@   )r!   deviceinputsno_repeat_ngram_sizeTearly_stopping	use_cacheforced_decoder_ids)benchmark_type	Exceptionr+   r,   r-   r.   r/   r0   itemsr#   r(   float32int32has_decoder_input_idsr2   has_logits_processorr3   has_temperaturer4   loggerinfor   time_fnhas_audio_streamtouse_fp16torchfloat16target_devicerD   )r   rC   kvload_audio_fn
audio_dataprocessor_fnr@   r   r*   rA   s   `       @@@r   
get_inputsr^       si   "SSIJJ
 oooo^^ $ 9 9--"55F e#LLNDAq!)q.BJJbhhWF1I #%%*,((D4J4J3KSUS[S[*\F&'$$)+43H3H2IQSQYQY)ZF%&$&HHd.>.>-?rzz$RF=! KK,t/01XMD-!6!67t445J!+~ KK&'--6$DKL D,
+!*-Ne##1 %((#}}emm%--HZHZ ) F8 &*%>%>F!"#FF;'+'='=#$Mr   c                    d\  }}d\  }}| j                   dv r| j                  r| j                  n| j                  }t        j                         }t	        j
                  || j                  rt        j                  nt        j                  d      j                  | j                        }t        j                         }| j                   dk(  rt        j                  |      }n| j                   dv rt        j                         }| j                  |_        |j#                  t%                      | j&                  r]d|_        d|_        | j,                  rCt        j.                  d       t        j0                  d       nt3        d	| j                          | j                   d
k(  rt5        | j6                        t8        u r| j6                  d   n| j6                  }t5        | j6                        t8        u r| j6                  d   nd }t        j                         }t;        j
                  | j<                  |||d      }t        j                         }| j                   dk(  rwt>        jA                  d| jB                          t        j                         }t        jD                  | jB                  || j6                  g      }t        j                         }t>        jA                  d||z
   d       |S )N)NN   r   r   T)torch_dtyperF   r   >   r   r      r   Cannot recognize r   )providerprovider_optionssession_optionsuse_io_bindingr   zLoading model from )	providerszLoaded model in  s)#rH   hf_pt_model_path
model_nametimer   from_pretrainedrU   rV   rW   rK   rT   rX   compiler   SessionOptionsr   enable_profilingregister_custom_ops_libraryr   verboselog_verbosity_levellog_severity_leveltuneset_default_logger_severityset_default_logger_verbosityrI   typeexecution_providertupler   hf_ort_dir_pathrP   rQ   ort_model_pathInferenceSession)r   modelsess_options
start_timeend_timesourcerd   re   s           r   	get_modelr   h   sk   $E<%J >>*.*?*?&&T__YY[
)99)-EMM
 "T
 	 	
 99;/1MM%(E			 1	1))+(,%001A1CD<</0L,./L+yy//2003 +D,?,?+@ABBh&15d6M6M1NRW1W4**1-]a]t]t9=d>U>U9VZ_9_42215eiYY[
(88  -(
 99;e#)$*=*=)>?@YY[
$$../

 99;
KK"8j#8"9<=Lr   c                 >   t        |      t        u r|d   n|}t        |      t        u r|d   n|}t        j                  | j                        }| j
                  dk(  rt        | j                        n%t        | j                  t        j                  d      }| j                  r ||      }t        j                  |       |D ]
  } ||        | j                  dk7  rt        j                  j                  |       t!        j                          }	| j
                  dk(  rt        | j"                        n%t        | j"                  t        j                  d      }
|
D ]
  } ||        | j                  dk7  rt        j                  j                  |       t!        j                          }| j
                  dk7  rt        j                  d       d}||	z
  | j"                  z  }||z  }t        j                  d	| d
       t        j                  d| d       y )Nr   rb   r   zWarm up)filedesccpu	Benchmark z	Latency: ri   zThroughput: z qps)rx   rz   rV   rB   rX   rH   rangewarmup_runsr
   sysstdoutrr   rP   rQ   cudasynchronizerl   num_runs)r   fnrC   warmup_inputsbenchmark_inputstorch_devicewarmup_rangeoutputs_r   bench_ranger   
batch_sizelatency
throughputs                  r   rR   rR      s   !%f!6F1IFM$(LE$9vayv<< 2 23L
 %' 	dD$$3::IF  ||]#G
=  {{e

|,J %' 	dmmDMM

E 
 
  {{e

|,yy{H e#BJ*$5Gg%J
KK)G9B'(
KK,zl$/0
r   c                 0   | j                   j                          d| j                   d| j                   d|j                  j                  dd       d| dt        j                  j                         d}d }| j                   dv rt        t        j                  t        j                  gdd      5 }t        d      5   ||       d d d        d d d        j                  d	      j                  | j                  | j                   
      }t"        j$                  j'                  | j(                  | d      }t+        |d      5 }|j-                  |       d d d        |S  ||       | d}|S # 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   |S xY w)N-r   z%Y-%m-%d_%H:%M:%Sr`   T)
activitiesrecord_shapesprofile_memorymodel_inference   )group_by_stack_n)sort_by	row_limitz.logw.json)rH   lower	precisionrB   __name__replacedatetimenowr   r   CPUCUDAr	   key_averagestablept_filter_bypt_num_rowsospathjoin
log_folderr"   write)	r   r   rC   inputs_typeprefixfilenameprof	prof_datar)   s	            r   
profile_fnr      s    ##))+,Adnn-=Qt{{m1R[[M`M`adfiMjLkklmxlyyz  |D  |M  |M  |Q  |Q  |S  Te  {f  gFH>>(,,.>.C.CDTXim
 !236
 4

 %%q%9??HYHYeieueu?v	77<<F84A(C AGGI ! O 	6
 XU#O 43
 
 ! Os0   3E??	E3E?F3E<	8E??FFc                    t        j                         }t        j                  |      }|j	                  d               t
        j                  d|j	                  d        d       t        j                          t        j                  j                          t        | j                  dk7  fd| j                         t        j                   j#                          y )Ng?)intervalzCPU usage: %r   c                              S r   r6   )r   rC   s   r   r8   zmeasure_fn.<locals>.<lambda>  s	    r&zr   )is_gpufuncmonitor_type)r   getpidpsutilProcesscpu_percentrP   rQ   gccollectrV   r   empty_cacher   rB   r   r   r   flush)r   r   rC   pidprocesss    ``  r   
measure_fnr      s    
))+CnnS!G%vJ
KK+g1141@ACD JJL	JJ4;;%/7IX\XiXij JJr   c                    
 fd
 
fd}|} j                   dk(  r ||        j                  rt         ||d      } j                   dk(  r|d t        d        }j                  j
                  j                         }|dz   }t        j                  j                  |      rYt        j                  d| d	|        t        j                  |t        j                  j                   j                  |             j                  j
                  j                         }|d
z   }t        j                  j                  |      rYt        j                  d| d	|        t        j                  |t        j                  j                   j                  |             j                   j
                  j                         }|dz   }t        j                  j                  |      rYt        j                  d| d	|        t        j                  |t        j                  j                   j                  |             y t        j#                  d       t%         ||        ||      \  }}	t        j#                  dt        |d          d       t        j#                  d|	d           t'         ||       y )Nc                 ,     j                   di | }|S )Nr6   )generate)rC   predicted_idsr~   s     r   get_pred_idsz&run_hf_inference.<locals>.get_pred_ids  s    &00r   c                      |       }g }t        j                        D ]1  }|j                  j                  j	                  |d      d          3 ||fS )NTskip_special_tokensr   )r   r.   appendr>   batch_decode)rC   r   transcriptionr   r   r   s       r   gen_and_decz%run_hf_inference.<locals>.gen_and_dec  s[    $V,t001A  !<!<]`d!<!efg!hi 2m++r   r   zgen-and-decr   r   z-encoder.json	Renaming  to z-decoder.jsonz-decoder-with-past.jsonz
Evaluating PyTorch...Generated token length: r    tokensTranscription: )rH   r   r   lenencodersessionend_profilingr   r   isfilerP   warningrenamer   r   decoderdecoder_with_pastrQ   rR   r   )r   rC   r~   r   generate_fnnew_logname
new_prefixold_lognamer   r   r   s   ` `       @r   run_hf_inferencer     s   
,  Ko-F|| {FMJ(*$_G}5J--//==?K$6Kww~~k*;-tK=IJ		+rww||DOO['QR--//==?K$6Kww~~k*;-tK=IJ		+rww||DOO['QR1199GGIK$'@@Kww~~k*;-tK=IJ		+rww||DOO['QR KK)*D+v&#.v#6 M=
KK*3}Q/?+@*AIJ
KK/-"2!345t[&)r   c                 t    d fd	}fd}fd} fd} j                   dk7  r|n|} ||      } j                  rxt         ||d      }	j                         }
t        j                  d|
 d|	        t        j                  |
t        j                  j                   j                  |	             y t        j                  d	       |} j                  r ||d
      }||f}t         ||        ||      } j                   dk7  r|j                         }|d   } j                  rt        j                  d|d   d           n_ ||d   d         }t        j                  dt!        |       d        j"                  j%                  |d   d
      d   }t'        |        t)         ||       y )Nc                    t        t        d j                                     }t        | j                               }||z
  }t	        |      r#t
        j                  d|        t        d      |rj                  r| d   | d<   ||z
  }t	        |      r#|D ]  }t
        j                  d| d       | |=   j                  dk7  rj                         }| j                         D ]  \  }}	|j                  ||	        j                         D ]4  }
|j                  |
j                   j                  j"                  	       6 |S | S )
Nc                     | j                   S r   namemodel_inputs    r   r8   z?run_ort_inference.<locals>.prepare_ort_inputs.<locals>.<lambda>N  s
    ;3C3Cr   z(The following model inputs are missing: zEThere are missing inputs to the model. Please add them and try again.r+   r,   zRemoving unnecessary input 'z' from user provided inputsr   )device_type	device_id)setmapr^   keysr   rP   errorrI   ru   rQ   rB   
io_bindingrJ   bind_cpu_inputget_outputsbind_outputr   r   )rC   warmupmodel_inputsuser_inputsmissing_inputsunnecessary_inputsunnecessary_inputr   rY   rZ   outputr   r~   s              r   prepare_ort_inputsz-run_ort_inference.<locals>.prepare_ort_inputsL  s>   3CUEUEUEWXY&++-(%3~LLCNCSTUcdddii#),#7F<  )<7!"%7!:;L:MMhij,- &8
 ;;%))+J1))!Q/ '++-&&v{{W[WeWe&f .r   c                 *    j                  |        | S r   )run_with_iobinding)r   r~   s    r   with_io_bindingz*run_ort_inference.<locals>.with_io_bindingj  s      ,r   c                 ,    j                  d |       }|S r   )run)rC   r   r~   s     r   without_io_bindingz-run_ort_inference.<locals>.without_io_bindingo  s    ))D&)r   c                     j                   | v r0t        j                  | j                   k(        d   d   }| d |dz    S | S )Nr   rb   )eos_token_idr#   where)r  	first_endr   s     r   handle_outputz(run_ort_inference.<locals>.handle_outputt  sJ    &4+<+<!<=a@CI/IM**r   r   e2er   r   z
Evaluating ONNX Runtime...T)r  r   r   r   r   r   )F)rB   r   r   r   rP   r   r   r   r   r   r   rQ   ru   rR   copy_outputs_to_cpurS   r   r>   r   printr   )r   rC   r~   r  r  r  r  r   
ort_inputsr   r   ort_evaluate_inputsort_warmup_inputsort_outputsactual_outputr   s   ` `             r   run_ort_inferencer  K  s   <

 &*[[E%9/?QK#F+J|| {JF ))+;-tK=AB
		+rww||DOO[IJ KK./$yy.vdC0*=D+23j)K{{e!557a.Kok!nQ&7%89: &k!nQ&78.s=/A.B'JK33KNX\3]^_`!t[*-r   c                     | j                   dv rt        | ||       y | j                   dk(  rt        | ||       y t        d| j                          )N>   r   r   r   r   rc   )rH   r   r  rI   )r   rC   r~   s      r   run_inferencer    sQ    HHvu-				%$.+D,?,?+@ABBr   c            	      *	   t        j                         } | j                  ddt        dg d       | j                  ddt        dd	       | j                  d
dt        ddg dd       | j                  dt        dd       | j                  dt        dd       | j                  dt        dd       | j                  ddt        dd	       | j                  ddt        t        j
                  j                         rdndg d        | j                  d!d"t        d#$       | j                  d%d&t        d'$       | j                  d(d)t        d*$       | j                  d+t        d,$       | j                  d-t        d.d/       | j                  d0t        d1$       | j                  d2t        d#$       | j                  d3t        d4$       | j                  d5t        d4$       | j                  d6t        d7$       | j                  d8t        d7$       | j                  d9t        d:$       | j                  d;t        d<d=       | j                  d>t        d4d?       | j                  d@t        d7dA       | j                  dBdCdDE       | j                  dFt        dGdH       | j                  dIt        dJdK       | j                  dLdCdDE       | j                  dMt        t        j                  j                  dN      dO       | j                  dPdCdDdQR       | j                         }t        j                  j                  |j                         t	        j                   |j                         |j"                  |_        dS|j&                  v r|j"                  j)                          dT|_        |j*                  dUk(  r |j*                  dV|j,                  if|_        nE|j*                  dWk(  r6|j*                  |j,                  d4|j.                  rd4nd#dXf|_        d|_        |j&                  dYk(  r|j0                  sJ dZ       |j&                  dSk(  r|j2                  sJ d[       t5        j6                  |j8                        |_        |S )\Nz-btz--benchmark-typeT)r   r   r   r   )rx   requiredchoicesz-mz--model-namez;Hugging Face name of model (e.g. 'openai/whisper-large-v2'))rx   r   helpz-pz--precisionfp32)int8fp16r#  zePrecision for model. For ONNX models, the model's precision should be set before running this script.)rx   r   defaultr!  r"  z--hf-pt-model-pathr   zNPath to directory containing all PyTorch files (e.g. tokenizer, PyTorch model))rx   r&  r"  z--hf-ort-dir-pathzaPath to directory containing all ONNX files (e.g. tokenizer, encoder, decoder, decoder_with_past)z--ort-model-pathzPath to ONNX modelz-az--audio-pathz%Path to audio file for E2E evaluationz-dz--devicer   r   )r   r   rocm)rx   r&  r!  z-idz--device-idr   )rx   r&  z-wz--warmup-runsr   z-nz
--num-runs
   z--seed   z--sampling-ratei>  zSampling rate for audio (in Hz)z--max-lengthi  z--min-lengthz--num-beamsrb   z--num-return-sequencesz--length-penaltyg      ?z--repetition-penaltyz--no-repeat-ngram-size   z--decoder-input-idsz[]zThe forced decoder ids for generation. Format is [start token, timestamp token, language token, task token]. Default is [start token]. See `decoder_input_ids` in https://github.com/microsoft/Olive/tree/main/examples/whisper for details.z--logits-processorzLWhether to use timestamps logits processor or not (0 for false, 1 for true).z--temperaturez!Temperature value for generation.z	--profileF
store_true)r&  actionz--pt-filter-byself_cpu_time_totalz"What to filter PyTorch profiler byz--pt-num-rowsi  z.Number of rows for PyTorch profiler to displayz	--verbosez--log-folder.zFolder to cache log filesz--tunezFOnly used by ROCm EP, enable TunableOp tuning to select fastest kernel)r&  r,  r"  r   ExecutionProviderCUDAExecutionProviderr   ROCMExecutionProvider)r   tunable_op_enabletunable_op_tuning_enabler   z,Please specify a path to `--hf-ort-dir-path`z+Please specify a path to `--ort-model-path`)argparseArgumentParseradd_argumentstrrV   r   is_availableintfloatr   r   r   
parse_argsr#   randomseedmanual_seedrB   r   rH   upperry   r   ru   r{   r|   astliteral_evalr2   )parserr   s     r   r;  r;    s   $$&F
A   J   (t   ]	   p	   !	   n3Lst
**113'   }3B
oCC
lbA
sA6 )UIjk S#>
S!<
C;
0sAF
*D
.UCH
0sAF  |	   [	   0	   U<H
s,AHl   c4Fvw
U<H
S"'',,s:KRmn
U	   D IINN499	dii D###%)[[%6%6%8$99J"K""&=='+'>'>dnn@]&^D#$$(??''!%)*59YYA'D# !DK h&##S%SS#e#""Q$QQ" !--d.D.DEDKr   c                     t               } t        | j                         t        j	                  | j
                         dt        j                  j                  _	        t        j                  | j                        }t        j                  | j                        }| j                  dk7  rd| j                   n| j                  }| j                   dk(  }t#        | d|       t#        | d|       t#        | d|       t#        | dd	       t#        | d
|j$                         t        j	                  d| j&                          t)        |       }| j*                  dk(  r{t-        t/        d |j1                                     }d|v | _        t#        | dd|v        t#        | dd|v        t#        | dd|v        | j&                  g k(  r|j4                  g| _        t1        |       }t7        | ||       y )NTr   zcuda:r%  r>   rX   rU   rS   Fr  zForced decoder prompt ids: r   c                     | j                   S r   r   r   s    r   r8   zmain.<locals>.<lambda>T  s
    {7G7Gr   r9   rM   r2   rN   r3   rO   r4   )r;  r   rr   rP   rQ   __dict__rV   backendscudnn	benchmarkr   rm   rk   r   rB   r   r   setattrr  r2   r   rH   r   r   r^   rS   decoder_start_token_idr  )r   configr>   rX   rU   r~   ort_model_inputsrC   s           r   mainrM  =  s   <D
KK%)ENN"**4??;F 00AI04u0DeDNN+,$++M~~'HD+y)D/=1D*h'D$e,D.&"5"56
KK-d.D.D-EFG dOEe#s#GIYIYI[\] .2B B-/BFV/VW,.@DT.TU':J)JK!!R'&,&C&C%DD"F$&r   __main__).r4  r@  r   r   loggingr   r   rl   numpyr#   r   rV   r   benchmark_helperr   r   onnxruntime_extensionsr   optimum.onnxruntimer   torch.profilerr   r   r	   tqdmr
   transformersr   r   r   onnxruntimer   	getLoggerr   rP   	Namespacer^   r   rR   r   r   r   r  r  r;  rM  r6   r   r   <module>rZ     s     
  	  	 
      9 3 8 E E  S S 			8	$EX'' EP@H&& @F.b:$=*@U.pCNb!'H zF r   