
    gO                     L   d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dlm	Z	 d dl
m
Z
 d dlmZ d dlmZ d dlZd dlZd dlZd dlmZmZ e	 G d d             Ze	 G d	 d
             Z	 	 	 ddZd Zd Zd Zd Zd Zd Zd Zd Z d Z!d Z"d Z#d Z$e%dk(  r
dZ& e$        yy)    N)	dataclass)datetime)Path)Optional)generate_test_dataget_bert_inputsc                       e Zd ZU eed<   eed<   eed<   eed<   eed<   eed<   eed<   eed<   eed	<   eed
<   eed<   eed<   eed<   y)TestSetting
batch_sizesequence_length
test_cases
test_timesuse_gpuuse_io_bindingproviderintra_op_num_threadsseedverboselog_severityaverage_sequence_lengthrandom_sequence_lengthN)__name__
__module____qualname__int__annotations__boolstr     \/var/www/openai/venv/lib/python3.12/site-packages/onnxruntime/transformers/bert_perf_test.pyr
   r
   "   sI    OOOMM
IM    r    r
   c                   j    e Zd ZU eed<   eed<   eed<   eed<   eed<   ee   ed<   ee   ed<   eed<   y	)
ModelSetting
model_pathinput_ids_namesegment_ids_nameinput_mask_name	opt_levelinput_tuning_resultsoutput_tuning_results	mask_typeN)r   r   r   r   r   r   r   r   r    r!   r#   r#   3   s7    ON"3-'#C=(Nr    r#   c                    dd l }|j                  |       |rd|j                         vrt        d       |r7|dk(  rddg}n0|dk(  rddg}n&|d	k(  rg d
}n|dk(  rddg}n|dk(  rg d}nddg}ndg}|j	                         }	||	_        |j                  j                  |	_        ||j                  j                  |	_        n|dk(  r|j                  j                  |	_        nj|dk(  r|j                  j                  |	_        nI|dk(  r|j                  j                  |	_        n(|dk(  r|j                  j                  |	_        n||	_        |||	_        |j!                  | |	|      }
|r|dk(  rd|
j#                         v sJ |dk(  rd|
j#                         v sJ |d	k(  r(d|
j#                         v sJ d|
j#                         v spJ |dk(  rd|
j#                         v sWJ |dk(  r(d|
j#                         v sJ d|
j#                         v s*J d|
j#                         v sJ d|
j#                         v sJ |:t%        |      5 }|
j'                  t)        j*                  |             d d d        |
S |
S # 1 sw Y   |
S xY w)Nr   CUDAExecutionProviderzWarning: Please install onnxruntime-gpu package instead of onnxruntime, and use a machine with GPU for testing gpu performance.dmlDmlExecutionProviderCPUExecutionProviderrocmROCMExecutionProvidermigraphx)MIGraphXExecutionProviderr2   r0   cudatensorrt)TensorrtExecutionProviderr-   r0         c   )	providersr4   r7   )onnxruntimeset_default_logger_severityget_available_providersprintSessionOptionslog_severity_levelExecutionModeORT_SEQUENTIALexecution_modeGraphOptimizationLevelORT_ENABLE_ALLgraph_optimization_levelORT_DISABLE_ALLORT_ENABLE_BASICORT_ENABLE_EXTENDEDr   InferenceSessionget_providersopenset_tuning_resultsjsonload)r$   r   r   r   rG   r   tuning_results_pathr<   execution_providerssess_optionssessionfs               r!   create_sessionrV   ?   s    ++L9+;3V3V3XX N	
 u#9;Q"R#:<R"S##
 #:<R"S## $;<R"S56--/L&2L#"-";";"J"JL'0;0R0R0a0a-	!Q	&0;0R0R0b0b-	!Q	&0;0R0R0c0c-	!Q	&0;0R0R0f0f-	!R	'0;0R0R0a0a-0H-',@)**:|Ob*cGu)W-B-B-DDDD*g.C.C.EEEE#.'2G2G2IIII*g.C.C.EEEE*g.C.C.EEEE#.'2G2G2IIII*g.C.C.EEEE*g.C.C.EEEE%)>)>)@@@@&%&!&&tyy|4 ' N7N ' Ns   %I66J c                     t         j                  t        j                  t         j                  t        j                  t         j                  t        j                  t         j
                  t        j                  i}||    S N)torchfloat32npfloat16int32int64longlong)
torch_typetype_maps     r!   
numpy_typerb      sJ    rzzrzzRXXR[[	H Jr    c                 6   | j                         D ci c]*  \  }}|t        j                  |      j                  |      , }}}|j                         D ci c]*  \  }}|t        j                  |      j                  |      , }}}||fS c c}}w c c}}w rX   )itemsrY   
from_numpyto)inputsoutputsdevicenamearrayinput_tensorsoutput_tensorss          r!   create_input_output_tensorsrn      s    QWQ]Q]Q_`Q_+$T5++E255f==Q_M`RYR_R_RabRa;4dE,,U366v>>RaNb.(( abs   /B/Bc           
         | j                         }|j                         D ]Z  \  }}|j                  ||j                  j                  dt        |j                        |j                  |j                                \ |j                         D ]Z  \  }}|j                  ||j                  j                  dt        |j                        |j                  |j                                \ |S Nr   )

io_bindingrd   
bind_inputri   typerb   dtypeshapedata_ptrbind_output)sessrl   rm   rq   rj   tensors         r!   create_io_bindingrz      s    "J%++-fMMv||$LLOO	
 . ',,.fMMv||$LLOO	
 / r    c                    g }g }|j                   rdnd}t        |      D ]  \  }}| j                  ||      }	|j                  |	       i }
t	        t        |            D ]  }|	|   |
||   <    t        ||
|      \  }}t        | ||      }| j                  |       t        j                         }| j                  |       t        j                         |z
  }|j                  |        ||fS )Nr5   cpu)r   	enumeraterunappendrangelenrn   rz   run_with_iobindingtimeitdefault_timer)rT   
all_inputsoutput_namestest_settingresultslatency_listri   _test_case_idrg   resultrh   irl   rm   rq   
start_timelatencys                    r!   %onnxruntime_inference_with_io_bindingr      s    GL#++VF!*:!6v\62vs<()A'-ayGLO$ * )DFGU[(\%~&w~N
 	"":.))+
"":.&&(:5G$! "7$ L  r    c                 \   t        |      dkD  r%| j                  |t        j                  |             g }g }t	        |      D ]d  \  }}t        j                         }| j                  ||      }t        j                         |z
  }	|j                  |       |j                  |	       f ||fS rp   )r   r~   randomchoicer}   r   r   r   )
rT   r   r   r   r   r   rg   r   r   r   s
             r!   onnxruntime_inferencer      s    
:L&--
";<GL!*:!6v))+
\62&&(:5vG$ "7 L  r    c                    |j                         }dt        j                  j                  |        d}|d|j                   d|j
                   dj                  dd      z  }|d|j                   d|j                   dz  }|d	|j                   d
|j                   dz  }|d|j                   d|j                   dz  }|d|j                   dz  }|d|j                   z  }|S )Nzmodel=,zgraph_optimization_level=z,intra_op_num_threads=zGraphOptimizationLevel.ORT_ zbatch_size=z,sequence_length=ztest_cases=z,test_times=zuse_gpu=z,use_io_binding=zaverage_sequence_length=zrandom_sequence_length=)get_session_optionsospathbasenamerG   r   replacer   r   r   r   r   r   r   r   )r$   rT   r   rS   options        r!   	to_stringr      s9   ..0Lbgg&&z2315F
),*O*O)PPfgs  hI  hI  gJ  JK  L  T  T%r F L3344ElFbFbEccdeeF
L334LAXAX@YYZ[[F
--..>|?Z?Z>[[\]]F
()M)M(NaPPF
'(K(K'LMMFMr    c           	      $   t        | j                  |j                  |j                  || j                  |j
                  | j                        }|j                         D cg c]  }|j                   }}t        | j                  ||      }||v rt        d|       y t        d|       g }	|j                  r=t        |j                        D ]$  }
t        ||||      \  }}|	j                  |       & n;t        |j                        D ]#  }
t!        |||      \  }}|	j                  |       % t#        j$                  |	      dz  }t'        j(                  |      }t#        j*                  |d      }t#        j*                  |d      }t#        j*                  |d      }t#        j*                  |d      }t#        j*                  |d	      }|j,                  d
|z  z  }|||||||f||<   t        dj/                  t/        |d      t/        |d                   | j0                  rt2        j4                  j7                  | j0                        }t2        j4                  j9                  |      rL|}|j;                  dd      d    dt=        j>                         jA                          d}t        d|d|d       |jC                         }tE        |d      5 }tG        jH                  ||       d d d        t        d|       y y c c}w # 1 sw Y   xY w)N)r   rQ   zskip duplicated test:zRunning test:  2   K   Z   _   r:   g     @@z,Average latency = {} ms, Throughput = {} QPS.2fz.jsonr8   r   .zWARNING:zexists, will write tozinstead.wzTuning results is saved to)%rV   r$   r   r   r(   r   r)   get_outputsrj   r   r?   r   r   r   r   extendr   r[   rk   
statisticsmean
percentiler   formatr*   r   r   abspathexistsrsplitr   now	timestampget_tuning_resultsrM   rO   dump)model_settingr   perf_resultsr   r   rT   outputr   keyall_latency_list_ir   r   
latency_msaverage_latency
latency_50
latency_75
latency_90
latency_95
latency_99
throughputoutput_pathold_output_pathtrsrU   s                            r!   run_one_testr      s     !..)>>G /6.A.A.CD.CFFKK.CLD
M,,g|
DC
l%s+	/3""//0B$I\<%!G\ ##L1	 1 //0B$9':|$\!G\##L1 1
 *+d2J ooj1Oz2.Jz2.Jz2.Jz2.Jz2.J((F_,DEJ 	L 
6==f_V[>\^deoqv^wx **ggoom&I&IJ77>>+&)O*11'1=a@A8<<>C[C[C]B^^cfK*o/FU_`((*+s#qIIc1 $*K8 +] El $#s   !LLLc                     t        j                  t        | ||||f      }|j                          |j	                          y )N)targetargs)multiprocessingProcessr   startjoin)r   r   r   r   r   processs         r!   launch_testr   <  s=    %% 
	G MMOLLNr    c                 t   |j                   t        | ||||j                          y t        j                  d      }t        j                  d      }t	        ||h      }t        dt        d|            D ]  }||vs|j                  |        |j                  d       |D ]  }t        | ||||        y )NF)logicalTr8      )reverse)	r   r   psutil	cpu_countlistr   minr   sort)	r   r   r   r   r   logical_corescandidate_threadsr   r   s	            r!   run_perf_testsr   K  s    ((4--	
 	  /I$$T2MmY781c"m,-%%$$Q' . 4( 1M<zK_` !2r    c                    t        | j                  | j                  | j                  | j                        \  }}}t        d|j                   d|j                   d|j                          t        |j                  |j                  |j                  |j                  |j                  ||||j                  |j                  | j                        }t        | |||       y )NzGenerating z samples for batch_size=z sequence_length=)r+   )r   r$   r%   r&   r'   r?   r   r   r   r   r   r   r   r   r+   r   )r   r   r   	input_idssegment_ids
input_maskr   s          r!   run_performancer   c  s    )8  $$&&%%	*&I{J 

l--..F|G^G^F__pq}  rN  rN  qO  	P $$$,,++))J =,jIr    c                     t        j                         } | j                  ddt        d       | j                  dddt        dd	       | j                  d
ddt        d       | j                  ddt        dd       | j                  dddt        dd       | j                  ddt        g ddd       | j                  ddt        dd       | j                  dddd !       | j                  d"       | j                  d#dt        d$g d%d&'       | j                  d(ddd)!       | j                  d*       | j                  d+ddd,!       | j                  d-       | j                  d.dt        d d/       | j                  d0d1dt        d d2       | j                  d3dt        d d4       | j                  d5dt        d d6       | j                  d7dt        d d8       | j                  d9d t        d:;       | j                  d<d t        d=;       | j                  d>d?d@t        dA;       | j                  dBdCdddD!       | j                  dE       | j                  dFdt        d$dG       | j                         }|S )HNz--modelTzbert onnx model path)requiredrs   helpz-bz--batch_size+zKbatch size of input. Allow one or multiple values in the range of [1, 128].)r   rs   nargsr   z-sz--sequence_lengthz maximum sequence length of inputz	--samplesF
   z!number of samples to be generated)r   rs   defaultr   z-tz--test_timesr   zJnumber of times to run per sample. By default, the value is 1000 / samplesz--opt_level)r   r8   r9   r:   r:   zZonnxruntime optimization level: 0 - disable all, 1 - basic, 2 - extended, 99 - enable all.)r   rs   choicesr   r   z--seed   zPrandom seed. Use the same seed to make sure test data is same in multiple tests.z	--verbose
store_truezprint verbose information)r   actionr   )r   z--log_severityr9   )r   r8   r9   r      z.0:Verbose, 1:Info, 2:Warning, 3:Error, 4:Fatal)r   rs   r   r   r   z	--use_gpuzuse GPU)r   z--use_io_bindingzuse io_binding)r   z
--providerzExecution provider to usez-nz--intra_op_num_threadsz>=0, set intra_op_num_threadsz--input_ids_namezinput name for input idsz--segment_ids_namezinput name for segment idsz--input_mask_namezinput name for attention maskz--input_tuning_resultsz3tuning results (json) to be loaded before benchmark)r   rs   r   z--output_tuning_resultsz1tuning results (json) to be saved after benchmarkz-az--average_sequence_lengthz)average sequence length excluding paddingz-rz--random_sequence_lengthz3use uniform random instead of fixed sequence length)r   z--mask_typezmmask type: (1: mask index or sequence length, 2: raw 2D mask, 3: key len, cumulated lengths of query and key))argparseArgumentParseradd_argumentr   r   set_defaults
parse_args)parserr   s     r!   parse_argumentsr     s=   $$&F
	DsAWX
Z   /   0   Y   i   _   (	   &
=   eLyY
&
*U<Vfg
u-
(    ,   '   )   ,    B	   !@	   #8   "B   u5
|   DKr    c                     t               } | j                  dk(  r't        dt        d| j                  z              | _        | j
                  dk  r| j                  | _        t        j                         }|j                         }t        | j                        }t        |      dk\  rt        |      dk  st        d      t        | j                  | j                   | j"                  | j$                  | j&                  | j(                  | j*                  | j,                        }|D ]  }t/        || j                  | j                  | j                  | j0                  | j2                  | j4                  | j6                  | j8                  | j:                  | j<                  | j
                  | j>                        }tA        d|       tC        |||        tE        |jG                         dd 	      }tH        jJ                  jM                  tO        | j                        jP                  d
jS                  | j0                  rdnddjM                  tE        tU        |            D cg c]  }tW        |       c}      | j                  tY        jZ                         j]                  d                  }	t_        |	dd      5 }
ta        jb                  |
dd      }d }|D ]  \  }}|je                  d      }|Fg d}|jg                  |D cg c]  }|je                  d      d    c}       |ji                  |       |D cg c]  }tS        |d       }}|jg                  |D cg c]  }|je                  d      d    c}       |ji                  |        	 d d d        tA        d|	       y c c}w c c}w c c}w c c}w # 1 sw Y   *xY w)Nr   r8   r      z batch_size not in range [1, 128]ztest settingFc                     | d   S )Nr8   r   )xs    r!   <lambda>zmain.<locals>.<lambda>R  s	    qQRtr    )r   r   zperf_results_{}_B{}_S{}_{}.txtGPUCPU-z%Y%m%d-%H%M%Szw+r   )newline	
)	delimiterlineterminatorr   )zLatency(ms)Latency_P50Latency_P75Latency_P90Latency_P95Latency_P99zThroughput(QPS)=r   zTest summary is saved to)5r   r   maxr   samplesr   r   r   Managerdictsetr   r   	Exceptionr#   modelr%   r&   r'   r(   r)   r*   r+   r
   r   r   r   r   r   r   r   r   r?   r   sortedrd   r   r   r   r   parentr   r   r   r   r   strftimerM   csvwritersplitr   writerow)r   managerr   batch_size_setr   r   r   sorted_resultsr   summary_filetsv_file
tsv_writerheadersr   perf_resultparamsvaluess                    r!   mainr  "  s   D!aTDLL%8!9:##q('+';';$%%'G<<>L)N1$^)<)C:;; 

!!""	M %
"  LLOOLLMM%%IILL((''
  	nl+|\B% %* L..0%^TN77<<TZZ(//\\EuHHfT.-A&BC&Bc!f&BCD  LLN##O4		
L 
lD"	-ZZDN
 .CYYs^F @AQ@A##G,0;<1fQ&F<MMF;Fq1773<?F;<'# !/ 
., 

$l37 D(  A =;' 
.	-s=   !N,=A	O N1!O =N6O !N;<O 1O  O	__main__)Nr9   N)'r   r  rO   r   r   r   r   r   dataclassesr   r   pathlibr   typingr   numpyr[   r   rY   bert_test_datar   r   r
   r#   rV   rb   rn   rz   r   r   r   r   r   r   r   r   r  r   __spec__r   r    r!   <module>r'     s     
   	    !       > ! ! !      "Vr ).!2! B9Ja0J:_DQ4h zHF	 r    