
    g<                     $   d dl Z d dlZd dlZd dlZd dlmZ d dlZd dlZd dlmZm	Z	m
Z
mZmZ d dlmZmZmZmZ d dlmZ d dlmZ d dlmZ d dlmZ  ej4                  d	      Zdd
Zd Zedk(  r" e       Z eej@                          ee       yy)    N)datetime)	Precisioncreate_onnxruntime_sessionget_ort_environment_variablesprepare_environmentsetup_logger)DEFAULT_TOLERANCEMODEL_CLASSESPRETRAINED_GPT2_MODELS
Gpt2Helper)version)QuantizeHelper)
AutoConfig)__version__ c                 n   t        j                         }|j                  dddt        ddj	                  t
              z          |j                  ddt        d	t        t        j                               d
dj	                  t        j                               z          |j                  ddt        t        j                  j	                  dd      d       |j                  ddt        t        j                  j	                  dd      d       |j                  dddt        d       |j                  ddddd       |j                  ddddd       |j                  d        |j                  d!t        d"dg d#d$%       |j                  d&ddd'       |j                  d(       |j                  d)d*t        t        j                  t        t              d+,       |j                  d-ddd.       |j                  d/       |j                  d0d1d2t        d3gd45       |j                  d6d2t        d3gd75       |j                  d8d9d2t        g d:d;5       |j                  d<d=dd d>?       |j                  d@dt        dAdB       |j                  dCddD       |j                  dE       |j                  dFddD       |j                  dG       |j                  dHddD       |j                  dI       |j                  dJddD       |j                  dK       |j                  |       }|S )LNz-mz--model_name_or_pathTz;Model path, or pretrained model name selected in the list: z, )requiredtypehelpz--model_classFGPT2LMHeadModelz!Model type selected in the list: )r   r   defaultchoicesr   z--cache_dir.cache_modelsz%Directory to cache pre-trained models)r   r   r   r   z
--onnx_dironnx_modelszDirectory to store onnx modelsz--test_timesd   z8Number of repeat times to get average inference latency.)r   r   r   r   z-vz--validate_onnx
store_truezValidate ONNX model)r   actionr   z-oz--optimize_onnxz'Use optimizer.py to optimize onnx model)optimize_onnxz--stager   )r         a6  Stage in generation: 1 (initial decoder), 2 (decoder), 0 (both). 1 - decode the first token when past_sequence_length is zero; 2 - decode the remaining tokens when past_sequence_length is not zero; 0 - one onnx model for both stages 1 and 2. Note that we will optimize 1 and 2 differently for best performance.)r   r   r   r   r   z	--use_gpuzuse GPU for inference)use_gpuz-pz--precisionzfPrecision of model to run. fp32 for full precision, fp16 for half precision, and int8 for quantization)r   r   r   r   z--torchscriptzuse Torchscript)torchscriptz-bz--batch_sizes+r    z
batch size)nargsr   r   r   z--sequence_lengthsz!sequence lengths (excluding past)z-sz--past_sequence_lengths)          @         zpast sequence lengthsz-rz--result_csvz$CSV file for saving summary results.)r   r   r   z--thread_numzThreads to usez--include_copy_output_latency)r   r   )include_copy_output_latencyz	--verbose)verbosez--output_torch_latency)output_torch_latencyz--disable_io_binding)disable_io_binding)argparseArgumentParseradd_argumentstrjoinr   listr
   keysospathintset_defaultsr   FLOAT32
parse_args)argvparserargss      h/var/www/openai/venv/lib/python3.12/site-packages/onnxruntime/transformers/models/gpt2/benchmark_gpt2.pyparse_argumentsrB   !   sy   $$&F
JTYYWmMnn   !]'')*0499]=O=O=Q3RR   S.14   S-0-   G   "   6   e,
O   eLOfg
&
!!Yu   %Sde
E*
oSsQCVbc
0   !)$   3   S"Scd
7%P\]
E:
eLI
&
05V
U3
.|T
51T"DK    c                    t        j                  t              t        j                  d      k  rt        d      t        j                  d|         | j                  t        j                  k(  r| j                  r| j                  sJ d       | j                  t        j                  k(  r| j                  rJ d       | j                  dk(  r| j                  dgk(  sJ d       t        j                  | j                   dk  rt#        j$                  d	
      n| j                          t'        t        j(                  j+                                | j,                  }| j.                  }t1        ||| j                         t2        | j4                     d   }t6        }t9        j:                  | j<                  | j>                  |      }|j;                  | j<                  ||      }t        j@                  | j                  rdnd      }|jC                  |       |jD                  dkD  }|jG                  || j<                  | j4                  d	|      }	|	d   }
t2        | j4                     d   }|jI                  |||
| jJ                  |||       | j                  s| j                  t        jL                  k7  r|	| j                  t        j                  k7  rtO        | j                        nd   }
|j                  |	d   |
| j                  t        j                  k(  |jP                  jR                  |jP                  jT                  |d	| j                         | j                  t        j                  k(  r^t        j                  d       tW        jX                  |
|	d   |       tW        jZ                  |      }t        j                  d       |	d   }
| j>                  r|j?                  |||||      }t]        |
| j                  d| j                   | jJ                        }|y |j_                  ta        | jb                        ta        | j                        ta        | jd                        || j4                        }|jg                  ||| j                  t        j                  k(        }| jh                  xs2 djk                  tm        jn                         jq                  d            }ts        |dd      5 }g d }tu        jv                  ||!      }|jy                          | jb                  D ]R  }| jd                  D ]?  }| j                  D ],  }|dkD  r
|dkD  r|dk\  sJ t        j{                  d"|||       |j}                  ||||jR                  |jT                  |jD                  |j~                  || j                  t        j                  k(  ||#      }|j_                  ||||| j4                        }	 | j                  s| j                  r|j                  ||| j                        \  }}t        |      D ]o  \  }}t        |t              r5t        j{                  d$| d%t        |       d&|d   j                          Kt        j{                  d$| d'|j                          q nd }d }| j                  r!|j                  ||| j                        \  }}n/|j                  ||||| j                  d| j                  (      \  }}| j                  r|}| j                  s6g }|D ]/  }|j                  |j                         j                                1 |j                  ||| j4                  t        | j                     t        | j                     )      r*t        j                  d*t        | j                      d+       t        j                  d,||||| j                  rd-nd|rd.nd       | j<                  | j4                  | j                  t               | j                  | j                  | j                  | j>                  |||| j                  |r|d/nd0|d/d } |j                  |        / B U 	 d d d        t        j                  d3|        |S # t        $ r% t        j                  d1d	2       Y    d d d        y w xY w# 1 sw Y   TxY w)4Nz3.1.0z/This tool requires transformers 3.1.0 or later.z
Arguments:z'fp16 requires --optimize_onnx --use_gpuzquantization only supports CPUr    r   z<past_sequence_lengths shall be 0 for stage==1 (init decoder)T)logical)r#   	cache_dir)configrF   zcuda:0cpu   )has_past
new_folderrawr!   )has_position_idshas_attention_maskfp32)auto_mixed_precisionstagezquantizing model...int8zfinished quantizing modelF)enable_all_optimizationnum_threadsr.   zbenchmark_result_{}.csvz%Y%m%d-%H%M%Sar   )modenewline)
model_namemodel_classrQ   environment_variablesgpu	precision	optimizerr#   
batch_sizesequence_lengthpast_sequence_lengthr0   torch_latencyonnxruntime_latency)
fieldnameszMRunning test for batch_size=%d sequence_length=%d past_sequence_length=%d ...)float16rM   rN   ztorch output z is tuple of size z, shape z shape )return_numpyr-   )rY   rtolatolz:Pytorch and ONNX Runtime outputs are all close (tolerance=z).zZbatch_size=%d, sequence_length=%d, past_sequence_length=%d, onnxruntime_latency=%.2f %s %sz(disable_io_binding)z, torch_latency={torch_latency}z.2fNone	Exception)exc_infozResults are saved to file )Vr   parsetransformers_versionRuntimeErrorloggerinfor\   r   FLOAT16r   r"   INT8rQ   past_sequence_lengthstorchset_num_threads
thread_numpsutil	cpu_countprint
__config__parallel_inforF   onnx_dirr   r
   rY   r   r   from_pretrainedmodel_name_or_pathr#   deviceton_layerget_onnx_pathsexport_onnxr.   r<   r4   rG   num_attention_headshidden_sizer   quantize_onnx_modelquantize_torch_modelr   get_output_shapesmaxbatch_sizessequence_lengthsget_output_buffers
result_csvformatr   nowstrftimeopencsv
DictWriterwriteheaderdebugget_dummy_inputs
vocab_sizevalidate_onnxr/   pytorch_inference
test_times	enumerate
isinstancetuplelenshaper0   onnxruntime_inference$onnxruntime_inference_with_binded_ior-   appendrH   numpycompare_outputsr	   r   writerowri   error)!r@   rF   
output_dirrY   
gpt2helperrG   modelr~   use_external_data_formatonnx_model_pathsonnx_model_pathuse_paddingsessionmax_output_shapesoutput_bufferscsv_filenamecsv_filecolumn_names
csv_writerr^   r_   r`   dummy_inputsoutput_shapesoutputsra   ivalueort_outputsort_latencycopy_outputsoutputrows!                                    rA   mainr      s-   }})*W]].  LMM
KK*TF#$~~***!!dll]4]]2~~'<<A!AAzzQ))aS0p2pp0	DOOq<P&**48VZVeVef	%


(
(
*+IJ	:t||< 0 01!4KJ''(?(?TM]M]irsF''(?(?Zc'dE \\dll(>F	HHV%~~2!00+ 1  'u-O 0 01!4K $&   T^^y/@/@@*$..T]TbTbBb3t~~+>hno  U#NNi///LL,,LL$$$!%** 	! 		
 >>Y^^+KK-...@PQW@XZrs"77>EKK34.v6O&&(* ' 
 ) %OOG  #44DD&&'D!!"  223Dfdnn`i`q`qNqrN??p&?&F&Fx||~G^G^_nGo&pL	lb	1X
  ^^HF
 **J#'#8#8,0,F,F(%>o.AFZ^_F___LLg"',	 $.#>#>",'22**))!%93D3D!D)4+6 $? $L %/$@$@",'((%MO$--1J1J5?5Q5QRWYegkgvgv5w2G] -6g,>5#-eU#;$*LL*7s:LSQVZLX`afghaiaoao`p(q%& %+LL=75;;-1X$Y -? '+G,0M227A7W7W 't84K 8B7f7f ' , . - $-2<@<\<\ 8g 84K  --+6L#'#:#:/1.9F$0$7$7

8J8J8L$M /:  *99 ' ,,0,<,<%6t~~%F%6t~~%F  :   !'&`arsw  tB  tB  bC  aD  DF  %G!" x&+0'6:6M6M2SUAN=TV +/*A*A+/+;+;%)ZZ5R5T#'<<)-)-););+/+;+;*4/>4H262I2IGTc/BZ`6A#5F  #++C0U -G $9 +) 
2J KK,\N;< % $[4@#G 
2	1B$C 
2	1s2   ;Dc?Jb, 	c, ccccc&__main__)N)!r1   r   loggingr8   r   rv   rs   benchmark_helperr   r   r   r   r   gpt2_helperr	   r
   r   r   	packagingr   quantize_helperr   transformersr   r   rl   	getLoggerrn   rB   r   __name__r@   r.    rC   rA   <module>r      s     
  	     ] \  * # <			2	DNod zDJ rC   