
    g=E                        d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlZd dl	m
Z
mZ d dlmZ d dlmZmZ d dlmZ  ej&                  d      Zd'dZ G d d	      Zd
 Zd Zd Zd Zd ZdefdZd ZddgfdefdZd Z d Z!d Z"d Z#defdZ$e%dk(  rv e       Z& ee&jN                         e&jP                  dk  s+e&jR                  dk  se&jP                  e&jR                  z  dk  rejU                  d       ejV                  jY                  e&j                        rfe&jZ                  sZe&j\                  s e/d e&j                   d!      eja                  d"e&j                          ejb                  e&j                          ee&jP                  e&jR                  e&j                        Z2e&jZ                  s	 e$e2e&       	  ee2jf                        Z4eja                  d$       e2jf                  jq                  d%d&      Z9 ee4e9       yy# e5$ r- ejm                  d#e2jf                          e2jn                  Z4Y iw xY w)(    N)get_ort_environment_variablessetup_logger)main)PRETRAINED_GPT2_MODELS
Gpt2Helper)	OnnxModel c           
         t        j                         }|j                  dddt        ddj	                  t
              z          |j                  ddt        d	d
       |j                  ddt        dd       |j                  ddt        dd       |j                  dddd       |j                  d       |j                  dddd       |j                  d       |j                  dddd       |j                  d       |j                  ddd       |j                  d       |j                  d ddd!       |j                  d"       |j                  d#ddd$       |j                  d%       |j                  |       }|S )&Nz-mz--model_name_or_pathTz2Model path, or pretrained model name in the list: z, )requiredtypehelpz--csvFzgpt2_parity_results.csvz#path of csv file to save the result)r   r   defaultr   z--test_casesi  znumber of test cases per runz--runs(   znumber of repeated runs	--use_gpu
store_truezuse GPU for inference)r   actionr   )use_gpuz--allz'run all combinations of mixed precision)allz-e--use_external_data_format)r   r   )use_external_data_formatz	--verbose)verbosez--skip_testzEdo not run test, and only rank experiments based on existing csv file)	skip_testz--overwritezOverwrite existing csv file)	overwrite)	argparseArgumentParseradd_argumentstrjoinr   intset_defaults
parse_args)argvparserargss      e/var/www/openai/venv/lib/python3.12/site-packages/onnxruntime/transformers/models/gpt2/gpt2_parity.pyparse_argumentsr&      s   $$&F
ADIINdDee   )2   +   5sBMfg
eLOfg
&
6	   E"
:US_`
7
eLI
&
T	   %(
*	   %(T"DK    c                       e Zd Zd Zd Zy)
ParityTaskc                 J    || _         || _        || _        g | _        d| _        y )Nr   )
total_runs
test_casescsv_pathresultsrun_id)selfr,   r+   r-   s       r%   __init__zParityTask.__init__b   s%    $$ r'   c                    t         j                   j                         j                  d      }| d| j                   }| xj                  dz  c_        	 t	        g |d| j
                   d| j                   ||| j                        }|r| j                  j                  |       |S # t        $ r t        j                  d|        d }Y |S w xY w)Nz%Y%m%d%H%M%S_   z-tz-r)experiment_namer/   csv_filenamezFailed to run experiment )datetimenowstrftimer/   r   r,   r+   r-   r.   append	Exceptionlogger	exception)r0   r"   r5   
start_timer/   results         r%   runzParityTask.runi   s    &&**,55nE
<q.q	O$OO 1ODOT__<MO /!]]	F ##F+
 	  	88IJKF		s   AB- -#CCN)__name__
__module____qualname__r1   r@    r'   r%   r)   r)   a   s    r'   r)   c                     g }dd l }t        | d      5 } |j                  |      }|D ]  }|j                  |        	 d d d        |S # 1 sw Y   |S xY w)Nr   r	   newline)csvopen
DictReaderr:   )r-   rowsrH   csvfilereaderrows         r%   load_results_from_csvrO   ~   sU    D	h	#w(CKK  
$ K	 
$ Ks   +A

Ac                 h    | D ]#  }|j                  d      st        | |         c S  t        d      )Nzaverage_latency(batch_size=z)Failed to get average_latency from output)
startswithfloatRuntimeError)rN   names     r%   get_latencyrU      s6    ??89T##  B
CCr'   c                 r    t        |       }t        | d         }t        | d         }|dz  |dz  z
  |dz  z
  S )z@Scoring function based on 3 metrics. The larger score is better.top1_match_rateonnx_size_in_MBi  
   d   )rU   rR   )rN   latency_in_msrW   rX   s       r%   scorer\      sK    $MC 123OC 123OT!MB$6639NNNr'   c                 (  
 t                t        d       i 
|D ]
  }|
|d   <    t        t        | j                         
fdd            }t        j                  | d|        t        j                  d| d       d	}d
}t        |j                               D ]p  \  }\  }}	|	|k7  r|}|	}|D ]Z  }|d   |k(  st        j                  dj                  ||	|t        |      t        |d         |d   |d   t                             p r y )Nz
**********r/   c                 0    | d   t        | d            fS )Nr4   r   )r\   )itemrow_maps    r%   <lambda>zprint_wins.<locals>.<lambda>   s    d1guWT!W-='>?r'   T)keyreversez Wins:z	Based on z* wins and a scoring function, the ranking:r   za{:02d}: WINs={:02d}, run_id={}, latency={:5.2f}, top1_match={:.4f}, size={}_MB, experiment={}, {}rW   rX   
experiment)printdictsorteditemsr<   debuginfo	enumerateformatrU   rR   r   )winsrK   	test_namerN   sorted_winsrankprevious_valuecountrb   valuer`   s             @r%   
print_winsru      s&   	G	(OG!$H  JJL?	
K LLI;f[M23
KK)I;&PQRDN():):)<=|UN"DC8}#w~~#C(c"345-.L)57	    >r'   c                    i }i }| D ]  }|d   }d||<   d||<    t        |dd      5 }g d}t        j                  ||      }|j                          g d}	t	        |       }
t        |
d	z
        D ]  }| |   }t        |d
   t              rt        j                  |d
         }n|d
   }t        |d	z   |
d	      D ]o  }| |   }d}|	D ]  }||   ||   k7  sd} n |s$t        |d
   t              rt        j                  |d
         }n|d
   }	 t        j                  j                  ||dd      \  }}t        j                  j                  ||dd      \  }}|C|dk  r>t        |d         t        |d         kD  r||d   xx   d	z  cc<   n||d   xx   d	z  cc<   |dk  r>t        |d         t        |d         kD  r||d   xx   d	z  cc<   n||d   xx   d	z  cc<   |d   |d   |d   t        |d         |d   |d   t        |d         ||||d}|j!                  |       r  	 ddd       t"        j%                  d|        t'        || d       t'        || d       y# t        $ r d}d}Y Fw xY w# 1 sw Y   PxY w)zRun U test and T test.r/   r   wr	   rF   )
model_namerun_id_1experiment_1top1_match_rate_1run_id_2experiment_2top1_match_rate_2U_statisticU_pvalueT_statisticT_pvalue)
fieldnames)rx   r,   runsr4   top1_match_rate_per_runTFz	two-sided)use_continuityalternativeN)axis	equal_varg?rW   rx   re   z(U-Test and T-Test results are output to zU-TestzT-Test)rI   rH   
DictWriterwriteheaderlenrange
isinstancer   jsonloadsscipystatsmannwhitneyu
ValueError	ttest_indrR   writerowr<   rk   ru   )rK   output_csv_path
utest_wins
ttest_winsrN   r/   rL   column_nameswriterrequired_match_columnsnum_resultsiresult1ajresult2all_matchedcolumnbutest_statisticutest_pvaluettest_statisticttest_pvalues                          r%   run_significance_testr      s   JJX
6
6 
 
osB	/7
 LA!E$i{Q'A1gG'";<cBJJw'@AB561q5+q1q'"4Fv'&/9&+ 5 #g&?@#F

7+D#EFA 9:A(49KK4L4L1T{ 5M 51O\ 160E0EaQUae0E0f-+t0CW%6785IZA[;\\"78#45:5"78#45:5$&W%6785IZA[;\\"78#45:5"78#45:5 #*,"7 ' 1$+L$9).w7H/I)J ' 1$+L$9).w7H/I)J#2 ,#2 , $g 2 () 
0` KK:?:KLMz4*z4*E " (&*O#'L(a 
0	/s7   B.J9J&I.8C:J.I?	;J>I?	?JJraw_onnx_modelc                    t        j                  |       }t        |      }|j                         }|j                  j
                  d   j                  |v sJ ||j                  j
                  d   j                     }|j                  dk(  r.t        j                  d|j                          |j                  S t        j                  d|j                   d|j                          y )Nr   MatMulz#Found last MatMul node for logits: z-Failed to find MatMul node for logits. Found z	 of node )onnxloadr   output_name_to_nodegraphoutputrT   op_typer<   rk   warning)r   model
onnx_modelr   nodes        r%   get_last_matmul_node_namer   "  s    IIn%E5!J$88:;;a %%)<<<<u{{11!499:D||x9$))EFyy
NNB4<<.PYZ^ZcZcYdefr'   c                     | j                   }d| dj                         }| j                  r|j                  d       |ddd|gz  }|r|j	                  dg|       |S )N-m  -o --use_gpu -p fp16r   --io_block_listlogitsz--node_block_list--op_block_list)model_name_or_pathsplitr   r:   extend)r$   last_matmul_node_nameop_block_listr   
parameterss        r%   get_mixed_precision_parametersr   1  sz    ##Eug2399;J$$67	 J ,=}=>r'   FastGeluLayerNormalizationtaskc                     t        |||      }dj                  t        |            }|rd| d}nd| d}t               }|r	|d| dz   }| j	                  ||       y )N,Mixed precision baseline +  in FP32z=Mixed precision baseline (logits output and last MatMul node z	 in FP32)z ())r   r   rh   r   r@   )r   r$   r   r   r   op_block_list_strrT   env_varss           r%   run_candidater   C  sw     06K][J!67,->,?xHNOdNeeno,.H8*A&&HHZr'   c                     | j                   }d| dj                         }| j                  r|j                  d       | j                  r|j                  d       d| dj                         }| j                  r|j                  d       ||fS )Nr   z -o -p fp32r   r   r   )r   r   r   r:   r   )r$   r   fp32_baselinefp16_baselines       r%   get_baselinesr   X  s    ##E%,224M||[)$$9:% 56<<>M$$9:-''r'   c                    ddg}| j                  ||z   d       dg}| j                  ||z   d       | j                  ||z   dgz   |D cg c]  }| c}z   dgz   d       g }|}|D ]L  }	dg|D cg c]
  }||	k7  s	| c}z   }
| j                  ||z   |
z   d	|	 d
      }|s<|j                  |       N t        |d       }t        d|       yc c}w c c}w )z:Step 0 is to check which operator in FP16 causes most lossr   r   zFP16 except logitsz--keep_io_typeszGraph I/O FP32, Other FP16r   z--force_fp16_initializerszFP32 except weights in FP16zFP32 except z in FP16c                     | d   S )NrW   rD   )ys    r%   ra   z"run_tuning_step0.<locals>.<lambda>~  s
    !<M:Nr'   )rb   z<step 0: optimized operator causes the most loss in precisionN)r@   r:   minrf   )r   r   all_opsoptimized_opsfp32_logitsfp32_ioooptimized_ops_resultsop_listopr   r?   
min_results                r%   run_tuning_step0r   g  s   $h/KHH][(*>? !GHH]W$&BC 	HH#4"55G8LGqG8LLPkOll%
 G*+'.M'QQ"Wq'.MM-'1MA\RTQUU]C^_!((0	  *0NOJ	
H*U 9M /Ns   		C
/
C:Cc                 L    |D ]  }d|g}| j                  ||z   d| d       ! y)zKStep 1 is to figure out which optimized operator in FP32 could benefit mostr   r   r   N)r@   )r   mixed_precision_baseliner   r   r   s        r%   run_tuning_step1r     s6    *B/$}4)"X6	
 r'   c           	          g d}|D cg c]	  }||v s| }}|D ]D  }||vsg ||}| j                  g |d|dj                  dj                  |      |             F yc c}w )zAssumed that you have run step 0 and 1 to figure out that Logits FP32 and some operators shall be in FP32,
    This step will try add one more operator.
    )r   r   SkipLayerNormalizationr   z(Mixed precision baseline + {},{} in FP32r   N)r@   rm   r   )r   r   r   candidate_fp32_opsxfp32_opsr   r   s           r%   run_tuning_step2r     s     V-D-am1C-HDX+hOOMHHN*N,=NN:AA#((8BTVXY  Es
   	A#A#c           	      0  
 t        j                  d|j                  |j                  g       }t	        |      \  }}| j                  |d      }g }|rd|v r|d   r|d   j                  d      }nt        d      g 
|rd|v r|d   r|d   j                  d      
nt        d      |j                  st        j                  d	       y | j                  |d
       t        |d         }t        | ||g        
fd}|j                  r7t        | |
|       t        ||g       }	t!        | |	|       t#        | |	|       n't        | || |g d             t        | ||dg       t        | || |g d             t        | || |g d             y )Nonnx_models)
new_folderremove_existingzFP32 baselineoptimized_operatorsr   z!Failed to get optimized operators	operatorszFailed to get operatorsz5skip mixed precision since --use_gpu is not specifiedzFP16 baselineraw)r   c                 8    | D cg c]	  }|v s| c}S c c}w NrD   )r   r   r   s     r%   get_fp32_opsz run_parity.<locals>.get_fp32_ops  s!    0Qr"-Q000s   	)r   r   Addr   )r   r   r   r   )r   EmbedLayerNormalizationr   r   r   )r   get_onnx_pathsr   r   r   r@   r   rS   r   r<   rk   r   r   r   r   r   r   r   )r   r$   onnx_model_pathsr   r   r?   r   r   r   r   r   s             @r%   
run_parityr     s   !0000	 $1#6 M=XXm_5FM(F2?T8U45;;C@>??G;&(f[.A%++C0455 <<KLHH]O,56Fu6MN $32F1 xx}g}E#A$H]mo#p 7G7G!&'^_		
 	dD"7
|T "#fg	 "j
	r'   __main__rZ      i'  zNot enough test cases or runs to get stable results or test significance. Recommend test_cases >= 100, runs >= 20, test_cases * runs >= 10000.zOutput file zK existed. Please remove the file, or use either --skip_test or --overwrite.z6Remove existing file %s since --overwrite is specifiedzFailed to load csv z#Start running significance tests...z.csvz
.stats.csvr   ):r   rH   r7   r   loggingosr   scipy.statsr   benchmark_helperr   r   convert_to_onnxr   gpt2_helperr   r   r   r   	getLoggerr<   r&   r)   rO   rU   r\   ru   r   r   r   r   r   r   r   r   r   r   rA   r$   r   r,   r   r   pathexistsr   r   rS   rk   remover   r-   rK   r;   r=   r.   replacesummary_csvrD   r'   r%   <module>r	     s&    
    	   H   :  			2	@F :DO'T[+|c , 34	
*(V6
CZ CL zD		B$//DII2MPU2US	

 
ww~~dhh~~txxj(st  KKPRVRZRZ[BIIdhhdootyy$((;D>>4$T]]3
 KK56--''=K$,A 4  .t}}o>?||s   H /I ?I 