
    g;                         d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlmZ d dl	m
Z
  ej                  e      Zd Zd Zd Zd Zd Zed	k(  r e        yy)
    N)setup_logger)BenchmarkRecordc                     t        j                         } | j                  ddt        d       | j                  ddt        d       | j                  dd	t        d
       | j                  ddt        d       | j                  dddd       | j                  dddd       | j                  dt        dd       | j                  dt        dd       | j                  dt        dd       | j                  dt        dd       | j                  d t        d!d"#       | j                  d$t        d!g d%d&'       | j                  d(t        d!g d)d*'       | j                  d+t        d,d-       | j                  d.ddd/       | j                  d0t        d1d2       | j                  d3t        d d4       | j                         }t        |d5|j                  j                  d6      d7   j                  d8d9             d:|j                   d;|j                   }|j                  s||_        t        j                  |j                  d!<       |xj                  d=z  c_        |S )>Nz-b--batch-sizesz1 2)typedefaultz-s--sequence-lengthsz8 16 32 64 128 256 512z-w--warmup-runs   z-n
--num-runs  z--hf-pt-eagerF
store_truez,Benchmark in PyTorch without `torch.compile`)r   actionhelpz--hf-pt-compilez)Benchmark in PyTorch with `torch.compile`--hf-ort-dir-path zDPath to folder containing ONNX models for Optimum + ORT benchmarking)r   r   r   z--ort-msft-model-pathzAPath to ONNX model from https://github.com/microsoft/Llama-2-Onnxz --ort-convert-to-onnx-model-pathz'Path to ONNX model from convert_to_onnx--cache-dirz./model_cachez-Cache dir where Hugging Face files are stored--model-nameTzModel name in Hugging Face)r   requiredr   --precision)int4int8fp16fp32zPrecision to run model)r   r   choicesr   --device)cpucudarocmzDevice to benchmark modelsz--device-idr   zGPU device IDz	--verbosezPrint detailed logsz	--timeout
   z8Number of mins to attempt the benchmark before moving on--log-folderz'Path to folder to save logs and results
model_size/.-z./_)exist_ok<   )argparseArgumentParseradd_argumentstrint
parse_argssetattr
model_namesplitreplacer"   	precision
log_folderosmakedirstimeout)parserargslog_folder_names      h/var/www/openai/venv/lib/python3.12/site-packages/onnxruntime/transformers/models/llama/benchmark_all.pyget_argsr=      s   $$&F
	   (	   	   	   ;	   8	   S	   P	   *6	   <	   )	   0%   ')   	   "	   G	   6	   DD, 5 5c :2 > F FsC PQ4??+1T^^,<=O??)KK$/ 	LLBLK    c           
      `   g }d\  }}}d\  }}}	}
d}d}d}d}d}d}d	}t        |      5 }|D ]h  }|j                  d
d      }||v rt        |t        |      d        }2||v rt        |t        |      d        }N||v rd}U||v rd}\||v r,t	        |t        |      |j                  d             }|dz  }||v r't	        |t        |      |j                  d             }	||v sd|v r3t	        ||j                  d      dz   |j                  d             dz  }
nY||j                  |      t        |      z   d  j                  dd      }t        j                  |      |    d   }t	        |      dz  }
|||||||	|
gz   }|j                  |       k 	 d d d        |S # 1 sw Y   |S xY w)N)NNN)NNNNzBatch Size: zSequence Length: zto get past_key_valueszwith past_key_valuesz	Latency: zThroughput: zpeak=
r   promptz	per-token r   CPU=   z MB'"max_used_MB)
openr3   r.   lenfloatrfindfindjsonloadsappend)	device_idlog_filebase_resultsentries
batch_sizesequence_lengthstep	latency_s
latency_ms
throughputmemorybatch_patternsequence_patternprompt_step_patternper_token_step_patternlatency_patternthroughput_patternmemory_patternf
input_linelinepeakusageentrys                           r<   process_log_fileri      s   G(8%J0F-Iz:v"M*23!O'N	h1J%%dB/D$ c-&8&:!;<
!T)"%d3/?+@+B&C"D$,'4/" D(!$s?';djjo"NO	&-
#t+"4,>(?$**S/#RS
4'D= #4

3!(;djj>O#PQTXXF  		. 9C<O O QRZZ[^`cdD JJt,Y7FE"5\D0F %#(  u%M  
R NS 
R Ns   B>F#(B0F##F-c           	         dd l }|j                  | g d      }|d   j                  d      |d<   |d   j                  d      |d<   |d   j                  d      |d<   |d   j                  d      |d<   |d	   j                  d
      |d	<   |d   j                  d
      |d<   |d   j                  d
      |d<   |d   j                  d
      |d<   dd l}|j                  }t        |D cg c],  }|j                  dv s|j                   d|j                   . c}      }d}d}	|r.|d   j                  d      d   }|d   j                  d      d   }	g }
|j                         D ]y  \  }}|d   dv rt        |d   |d   d|d   ||	      }nY|d   dv r6t        |d   |d   d|d   t        j                  t        j                        }nt        |d   |d   |d   |d   dd      }|d   |j                  _        |d   |j                  _        |d   |j                  _        |d   |j                  _        |d   |j                  j&                  d<   |d   |j                  j&                  d<   |d	   |j(                  j&                  d<   |d   |j(                  _        |d   |j(                  j&                  d<   |d   |j(                  _        |
j/                  |       | t        j0                  ||
       t        j2                  |j5                  dd       |
       t6        j9                  d!| d"       y c c}w )#Nr   )Warmup RunsMeasured Runs
Model NameEngine	PrecisionDevice
Batch SizeSequence LengthStepLatency (s)Latency (ms)Throughput (tps)Memory (GB))columnsrk   r.   rl   rq   rr   rt   rK   ru   rv   rw   )onnxruntimezonnxruntime-gpuz==r   rE   rn   )optimum-ortry   rm   ro   ry   rp   )pytorch-eagerpytorch-compilepytorchrs   measure_stepenginelatency_s_meanthroughput_tps.csvz.jsonzResults saved in !)pandas	DataFrameastypepkg_resourcesworking_setsortedkeyversionr2   iterrowsr   torch__name____version__configwarmup_runsmeasured_runsrU   
seq_length
customizedmetricslatency_ms_meanmax_memory_usage_GBrP   save_as_csvsave_as_jsonr3   loggerinfo)resultsfilenamepddfr   installed_packagesiinstalled_packages_listort_pkg_nameort_pkg_versionrecordsr'   rowrecords                 r<   save_resultsr      sg   	
 
 
B( =)007B}_-44U;B,'..u5B|0188?B=)009B}N+227;B~ 23::7CB=)009B} &22$*<l*<QIk@kAEE7"QYYK	 *<l LO.q177=a@1!4::4@C G++-3x=::$L!3{#3]CMS_apF ]BB$L!3{#3YHu~~_d_p_pF %S%6K8H#h-Y\]eYfhjlnoF$'$6!&)/&:##&|#4 #&'8#9 36v;  0-0]  *69-6H!!"23),^)<&69:L6M!!"23-0-?*v-  0 '2  !1!1&'!BGL
KK#H:Q/0K 	ms   +L<?L<c                 l   | dt         j                   j                         dd}t        j                  j	                  | j
                  |      }t        |d      5 }t        j                  |||      }	 |j                  | j                         d d d        t        j                  d       | j                  | j                   | j"                  || j$                  | j&                  g}t)        | j*                  ||      }|S # t        j                  $ r |j                          Y w xY w# 1 sw Y   xY w)Nr'   %Y-%m-%d_%H:%M:%Sz.logw)stdoutstderrz Gathering data from log files...)datetimenowr6   pathjoinr5   rI   
subprocessPopenwaitr8   TimeoutExpiredkillr   r   r   num_runsr1   r4   deviceri   rQ   )	r:   benchmark_cmdr   log_filenamelog_pathrR   processrS   r   s	            r<   	benchmarkr   $  s    XQx004467HINLww||DOO\:H	h	""=(S	LL& 
 KK23$$dmmT__fdnn^b^i^ijLt~~xFGN (( 	LLN		 
	s*   D*6D#D'$D*&D''D**D3c                  	   t               } t        | j                         t        j	                  | j
                         dt        j                  j                  _	        g }t        | j                        t        j                  d<   | j                  rdddddd| j                  d	| j                   d
| j"                  d| j$                  d| j&                  dt        | j(                        dt        | j*                        d| j,                  d| j.                  dg}t        j	                  d       t        | |d      }|j1                  |       | j2                  rdddddd| j                  d	| j                   d
| j"                  d| j$                  d| j&                  dt        | j(                        dt        | j*                        d| j,                  d| j.                  dg}t        j	                  d       t        | |d      }|j1                  |       | j4                  rdddddd| j4                  d| j                  d	| j                   d
| j"                  d| j$                  d| j&                  dt        | j(                        dt        | j*                        d| j,                  d| j.                  dg}t        j	                  d       t        | |d      }|j1                  |       | j6                  rdddddd| j6                  d| j                  d	| j                   d
| j"                  d| j$                  d| j&                  dt        | j(                        dt        | j*                        d| j,                  d| j.                  g}t        j	                  d       t        | |d      }|j1                  |       | j8                  rdddddd| j8                  d| j                  d	| j                   d
| j"                  d| j$                  d| j&                  dt        | j(                        dt        | j*                        d| j,                  d| j.                  g}t        j	                  d       t        | |d       }|j1                  |       | j:                   d!| j                    d!t<        j<                  j?                         d"d#}tA        |t        jB                  jE                  | j,                  |             y )$NTCUDA_VISIBLE_DEVICESpythonz-mzmodels.llama.benchmarkz--benchmark-typezhf-pt-eagerr   r   r   r	   r   r
   r   r!   r   z--authz'Benchmark PyTorch without torch.compiler{   zhf-pt-compilez$Benchmark PyTorch with torch.compiler|   zhf-ortr   z Benchmark Optimum + ONNX Runtimerz   zort-msftz--ort-model-pathz)Benchmark Microsoft model in ONNX Runtimezort-convert-to-onnxz/Benchmark convert_to_onnx model in ONNX Runtimery   r'   r   r   )#r=   r   verboser   r   __dict__r   backendscudnnr   r-   rQ   r6   environhf_pt_eagerr1   r4   batch_sizessequence_lengthsr   r   r   r5   	cache_dirextendhf_pt_compilehf_ort_dir_pathort_msft_model_pathort_convert_to_onnx_model_pathr"   r   r   r   r   r   )r:   all_resultsr   r   csv_files        r<   mainr   6  sc   :D
KK%)ENN"K),T^^)<BJJ%& $OONN !!KK  !OONN1
4 	=>D-A7# $OONN !!KK  !OONN1
4 	:;D-1BC7# $  OONN !!KK  !OONN5
8 	67D-?7# $$$OONN !!KK  !OONN3
6 	?@D-<7# **$!//OONN !!KK  !OONN3
6 	EFD-?7#//"!DNN#31X5F5F5J5J5LM^4__cdHbggll4??HEFr>   __main__)r*   r   rN   loggingr6   r   r   benchmark_helperr   r   r   	getLoggerr   r   r=   ri   r   r   r    r>   r<   <module>r      si        	   ) #			8	$GT6rJ1Z$nGb zF r>   