
    g?L                         d dl Z d dlZd dlZd dlZd dlZd dlZd dlZd dlmZ d dl	m
Z
mZ  e j                  e      Z G d d      Z G d d      Zy)    N)	Precision)
Gpt2Helper
Gpt2Inputsc                   N    e Zd ZddZd ZdefdZdefdZddZ	ddZ
d	 Zd
 Zy)
Gpt2Metricc                     |dkD  r|dk  sJ || _         || _        | d| | _        || _        d| _        d| _        d| _        d| _        d| _        d | _	        d | _
        i | _        y )N   d   z vs r   )baseline	treatmentnametop_ktop_1_errortop_k_errortotal_samplesmax_logits_diffmax_logits_diff_no_pastbatch_top1_errorbatch_topk_errorseq_len_latency)selftreatment_namebaseline_namer   s       e/var/www/openai/venv/lib/python3.12/site-packages/onnxruntime/transformers/models/gpt2/gpt2_tester.py__init__zGpt2Metric.__init__   s}    qyUc\))%'*+4?	
 ! !"#&'./$3737!    c                 D   | j                   | j                  k7  rt        d       t        d| j                   d| j                    d       | j                  dkD  rd| j                  z  | j                  z  }d| j
                  z  | j                  z  }t        d| j                   d| j                   d	|d
d| j                   d| j
                   d	|d
d       t        d       t        d| j                  d       t        d| j                  d       nt        d| j                   d       | j                  rt        d       d}d}t        | j                  j                               D ]  }t        j                  | j                  |         dz  }|dk(  rt        d| d|d
d       n"t        dd|z   dd|dz   z  dz
   d|d
d       ||t        | j                  |         z  z  }|t        | j                  |         z  } t        d||z  d
d       y y )Nz---zMetrics for z (baseline=z):r   g      Y@zTotal=z Top1Error=z (z.2fz%) TopzError=z%)zMax logits diffs:z	with past  = z.6fz	empty past = z (baseline):z/Past sequence length range and average latency:     @@	z:         	z msz	[   z, r	   z]:	zAverage Latency: )r   r   printr   r   r   r   r   r   r   sortedkeys
statisticsmeanlen)r   top_1_error_ratetop_k_error_ratetotalcountkeyaverages          r   r!   zGpt2Metric.print%   sW   ==DNN*%LL 0DMM?"MN!!A%#(4+;+;#;d>P>P#P #(4+;+;#;d>P>P#P T//0D<L<L;MRP`adOeeklplvlvkww}  C  O  O  ~P  PR  Sc  dg  Rh  hj  k %&OD$8$8#=>?OD$@$@#EFGL 0=>CDEEd22779:$//$*>*>s*CDvM!8Bse<}C@AC3xr!a.1*<)=T'#cRS3t';';C'@#AAAT11#677 ; %eemC%8<=  r   is_empty_pastc                     ||z
  j                         j                         }|rt        | j                  |      | _        |S t        | j                  |      | _        |S N)absmaxr   r   )r   baseline_logitstreatment_logitsr-   diffs        r   diff_logitszGpt2Metric.diff_logitsC   sZ    "22779==?+.t/K/KT+RD(  $'t';';T#BD r   
batch_sizec                     | xj                   |z  c_         t        j                  |dft        j                        | _        t        j                  |dft        j                        | _        y )Nr	   dtype)r   torchzerosboolr   r   )r   r6   s     r   start_batchzGpt2Metric.start_batchL   sI    j( %ZO5:: N %ZO5:: Nr   c                 H   | j                  |j                  |j                  d|       | j                  |j                  |j                  | j                  |       | j	                  |j
                  |j
                  |dk(        }|rt        d| j                   d|        y y )Nr	   r   zMax logits diffs of z: )
_eval_topktop_1_tokenstop_k_tokensr   r5   logitsr!   r   )r   r   r   past_seq_lenverbosemax_diffs         r   
eval_batchzGpt2Metric.eval_batchQ   s    --y/E/Eq'R--y/E/EtzzSZ[##HOOY5E5E|WXGXY(2hZ@A r   c                    t        j                  t        j                  ||            s|dk(  rR|rt        d| j                          | xj
                  t        j                  ||      j                         z  c_        y |rt        d| d| j                   d       | xj                  t        j                  ||      j                         j                  d      j                  d      dkD  z  c_        y y )Nr	   z!Generated tokens not matched for zTop z tokens not matched for z-. This will lead to wrong beam search results)dimr   )
r:   alleqr!   r   r   logical_notr   sum	unsqueeze)r   baseline_topktreatment_topkr   rD   s        r   r?   zGpt2Metric._eval_topkY   s    yy-@Az=dii[IJ%%-)P)\)\)^^%ug%=dii[Huv %%HH]N;GGIMMaPZZ_`Zadee% Br   c                     | xj                   | j                  j                         z  c_         | xj                  | j                  j                         z  c_        y r/   )r   r   rL   r   r   r   s    r   	end_batchzGpt2Metric.end_batchh   sB    D115577D115577r   c                     |dkD  r!t        t        j                  |            dz   nd}|| j                  vrg | j                  |<   | j                  |   j	                  |       y )Nr   r	   )intmathlog2r   append)r   rC   latencyr+   s       r   add_latencyzGpt2Metric.add_latencyl   sZ    2>2Bc$))L)*Q.d***(*D  %S!((1r   N)Torch   )T)__name__
__module____qualname__r   r!   r<   r5   rT   r=   rF   r?   rR   rY    r   r   r   r      s;    "><D Oc O
B82r   r   c            
           e Zd Z	 	 	 ddZdefdZd Zd Zd Ze	dd	       Z
e	d
        Ze	d        Ze	ej                  ddddddddf	d       Zy)
Gpt2TesterFr[   c                 (   |j                   d   | _        |j                   d   | _        || _        || _        || _        || _        |d u| _        |d u| _        g | _	        d| j                  |d||z  g}t        |      D ]p  }t        j                  |      j                  |rt        j                  nt        j                        }| j                  j!                  |j#                  |             r d | _        d | _        d | _        |	| _        |
| _        y )Nr   r	   r    )shaper6   input_lengthn_layer	input_idsposition_idsattention_maskhas_position_idshas_attention_maskpastranger:   emptytypefloat16float32rW   torB   r@   rA   r   top_k_required_order)r   rf   rg   rh   num_attention_headshidden_size	num_layerdeviceis_fp16r   rr   
past_shape_i
empty_pasts                 r   r   zGpt2Tester.__init__t   s    $//!,%OOA. "(, ,D 8"0"< 	OO..

 	"BZ055wemmTYTaTabJIIZ]]623 #   
$8!r   returnc                 n    t        | j                  | j                  | j                  | j                        S r/   )r   rf   rg   rh   rk   rQ   s    r   
get_inputszGpt2Tester.get_inputs   s*    $..$*;*;T=P=PRVR[R[\\r   c           	      0   ddl m t        j                  j	                  |dt        |      z         }t        j                  j                  |      rt        d| d       y t        j                  |d       fd}g } ||| j                  d	       | j                  r ||| j                  d
       | j                  r ||| j                  d       t        | j                        D ]%  } ||| j                   |   dt        |      z          ' t#        |      D ][  \  }}	t%        t        j                  j	                  |d| d      d      5 }
|
j'                  |	j)                                d d d        ] |j+                         D cg c]  }|j,                   }}t#        |      D ]  \  }}j/                  t1        |   t2        j4                        r||   n.||   j7                         j9                         j3                               }	t%        t        j                  j	                  |d| d      d      5 }
|
j'                  |	j)                                d d d         t        d|        y # 1 sw Y   gxY wc c}w # 1 sw Y   xY w)Nr   )numpy_helpertest_data_set_z
Directory z existed. Skip saving test dataT)exist_okc                     | j                  j                  |j                         j                         j	                         |             y r/   )rW   
from_arrayclonecpunumpy)input_tensorstorch_tensorr   r   s      r   
add_tensorz-Gpt2Tester.save_test_data.<locals>.add_tensor   s:      !8!89K9K9M9Q9Q9S9Y9Y9[]a!bcr   rf   rg   rh   past_input_z.pbwboutput_zTest data saved to directory )onnxr   ospathjoinstrexistsr!   makedirsrf   ri   rg   rj   rh   rl   re   rk   	enumerateopenwriteSerializeToStringget_outputsr   r   
isinstancer   ndarrayr   r   )r   sessionoutputsave_test_data_dirtest_case_idr   r   r   itensorfoutput_names_namer   s                @r   save_test_datazGpt2Tester.save_test_data   s!   %ww||.0@3|CT0TU77>>$Jtf$CDE
D4(	d =$..+>  }d&7&7H""}d&9&9;KLt||$A}diilGc!f4DE % #=1IAvbggll46!C94@A0023 A@ 2 3:2E2E2GH2G2GH!,/HAu!,,'q	5==Aq	vayGXG\G\G^GdGdGfF bggll471#S)9:DAQ0023 BA	 0 	-dV45 A@ I
 BAs    I:J J:J	J	c                    t        |d   t        j                        rt        j                  |d         n.|d   j                         j                         j                         | _        t        j                  | j                        | _        t        j                  | j                  | j                  | j                        | _        | j                  j                         j                         j                  | j                   dg      j#                  |      | _        | j&                  rct        j(                  | j*                  |z   dz
  g      j-                  d      j/                  | j                   d      j#                  |      | _        | j2                  rnt        j4                  | j6                  t        j8                  | j                   dg      j;                  | j6                        gd      j#                  |      | _        g | _        t        |d   t>              rtA        |d         | _        ytC        | jD                        D ]  }t        ||dz      t        j                        rt        j                  ||dz            n#||dz      j                         j                         }| j<                  jG                  |j#                  |              y)z7
        Update the inputs for next inference.
        r   r	   N)$r   r   r   r:   
from_numpyr   detachr   rB   ra   predict_next_tokenr@   r   rr   rA   reshaper6   rq   rf   ri   r   rd   rM   repeatrg   rj   catrh   onestype_asrk   tuplelistrl   re   rW   )r   r   steprv   r   past_is         r   updatezGpt2Tester.update   s8   
 ,6fQi+OEVAY'U[\]U^UdUdUfUmUmUoUsUsUu 	 '99$++F&99$++tzzSWSlSlm**00299;CCT__VWDXY\\]cd  d//$6:;<FFqIPPQUQ`Q`bcdgghno  """'))''JJ34<<T=P=PQ # bj  	fQi'VAYDI4<<( "&Q-? $$VAE]3A,,.557 
 		  6!23 )r   c                 V   t        d       | j                  H| j                  |j                  z
  j                         j                         }|dkD  rt        d|        t	        j
                  | j                  |j                  k(        s!t        d| j                  |j                         | j                  rMt	        j
                  | j                  |j                  k(        s!t        d| j                  |j                         | j                  rMt	        j
                  | j                  |j                  k(        s!t        d| j                  |j                         t        | j                        t        |j                        k(  sJ t        | j                        D ]  \  }}|j                  |j                  |   j                  k(  sJ |j                         dkD  sB||j                  |   z
  j                         j                         }|dkD  svt        d	| d
|         y)z3
        Compare inputs and logits output.
        zstart diff...Ng-C6?z$Max logits difference is too large: zInput_ids is differentzposition_ids is differentzattention_mask is differentr   zmax_past_diff[z]=)r!   rB   r0   r1   r:   rI   rf   ri   rg   rj   rh   r&   rk   r   rc   nelement)r   r   max_io_diffr   r   max_past_diffs         r   r4   zGpt2Tester.diff   s   
 	o;;";;8==?CCEKT!<[MJKyy8+=+==>*DNNH<N<NO  99T..(2G2GGH/%%)) ""99T00H4K4KKL1''++ 499~X]]!3333"499-IAv<<8==#3#9#9999 1$!'(--*:!: ? ? A E E G 4'N1#R?@ .r   c                     | dddf   }|dk(  rt        j                  |dd      }|S t        j                  |dd      ddd|f   }|s|j                         \  }}|S |S )z4
        Get top k topkens based on logits.
        Nr	   T)
descending)r:   argmaxargsortsort)rB   r   required_orderlastTokenLogitsgeneratedTokenstopksorted_topk_s           r   r   zGpt2Tester.predict_next_token  sl     !B-A:#ll?AtDO""=="Fq&5&yQD!!%Q""Kr   c                    g }t        |      D ]  }t        | |dz      t        j                        rt	        j
                  | |dz            n| |dz      }t        ||dz      t        j                        rt	        j
                  ||dz            n||dz      }||z
  j                         j                         }|j                  |        t        d|        y)zO
        Compare the present outputs of two outputs from ONNX Runtime.
        r	   zpresent_diff_max=N)
rl   r   r   r   r:   r   r0   r1   rW   r!   )onnx_outputonnx_io_outputre   present_diff_maxr   onnx_present_ionnx_io_present_irE   s           r   diff_presentzGpt2Tester.diff_present+  s    
 wA k!a%0%--@   QU!34 Q'  nQU3U]]C   A!67#AE* 
 ')::??AEEGH##H-   	!"2!345r   c                 H    ddl m}  ||       }ddlm} |j                  |k(  S )z>
        Returns True if the ONNX model is quantized.
        r   )load)__producer__)r   r   !onnxruntime.quantization.quantizer   producer_name)onnx_model_pathr   modelquantize_producers       r   is_quantized_onnx_modelz"Gpt2Tester.is_quantized_onnx_model@  s&    
 	_%W""&777r   Gpt2LMHeadModelT   r   .c                 
   t        d| d| d| dt        |       d|	 d       |j                  j                  }|j                  j                  }|j                  j
                  }|j                  j                  }d}|t        j                  k(  }|r!d| j                         d   j                  v sJ |j                         j                  |       t        j                  d	d
d|j                  |      }t        j                  |||      }d}|t        j                   k(  rdnd}t#        |||      }t#        |||      }t#        |dz   ||      }t%        |      D ]  \  }}|	dkD  r||	k(  r n|dz  dk(  rt        |        |d   }|j'                  dd      }|j'                  dd      }t)        |||||||||| 
      }t)        |||||||||| 
      } t)        |||||||d|| 
      }!|!j*                  }"|j-                  |"       |j-                  |"       t/        j0                         5  t/        j2                  |"t.        j4                        }#t7        |      D ]  }$t9        |j:                  j=                               d   }%t9        |j>                  d   j=                               d   }&tA        jB                         }'t        jD                  ||!jG                               }(|jI                  |&tA        jB                         |'z
         |!jK                  |(|$|       t        jL                  | |jG                         d      \  })}*|jI                  |&|*dz         |jK                  |)|$|       t        j                  |"|&|%|j                  |      }+t        jN                  ||+       t        jP                  | | jG                         ||+ddd      \  },}*|jI                  |&|*dz         ||k  r| jS                  | |,||       |dz  }| jK                  |,|$|       |
ru|jU                  |        t(        jW                  |)|,|       t        d        t        d!|!jX                         t        d"|jX                         t        d#| jX                         |j[                  |!||&|
$       |j[                  |!| |&|
$       |#|!jX                  |k(  j]                         z  }#t/        j^                  |#      s n ddd       |ja                          |ja                           |j                          |j                          |j                          y# 1 sw Y   ]xY w)%z
        Test Generation using greedy beam search (without sampling) to compare PyTorch and ONNX model.
        It will print top 1 and top k errors on the given test inputs.
        zstart test generation: (top_k=z top_k_no_order=z max_steps=z test_inputs=z max_inputs=)r   ro             )r6   past_sequence_lengthsequence_lengthconfigmodel_class)
is_float16rZ   zQuantized OnnxOnnxz with IO Binding
   rf   rg   Nrh   Fr8   r	      )
total_runsr   )r   T)r   return_numpyinclude_copy_output_latencyzTop 1 tokens:z	Torchz	ONNXz	ONNX with IO binding)rD   )1r!   r&   r   re   n_headn_embdeos_token_idr   FLOAT16r   rn   evalrq   r   get_output_shapesget_output_buffersINT8r   r   getra   r6   r=   r:   no_gradr;   r<   rl   r   rf   sizerk   timeitdefault_timerpytorch_inferencer}   rY   r   onnxruntime_inferenceauto_increase_buffer_size$onnxruntime_inference_with_binded_ior   r4   r   r@   rF   anyrI   rR   )-r   r   rv   test_inputs	precisionr   r   top_k_no_order	max_steps
max_inputsrD   r   r   re   r   r   r   test_data_savedr   init_output_shapesoutput_buffersr   r   torch_metriconnx_metriconnx_io_metricr   inputsrf   rg   rh   onnx_runneronnx_io_runnertorch_runnerr6   doner   seq_lenrC   
start_timepytorch_outputr   avg_latency_msoutput_shapesr   s-                                                r   test_generationzGpt2Tester.test_generationL  sb   ( 	,UG3CNCSS^_h^iivwz  |G  xH  wI  IU  V`  Ua  ab  c	
 ,,&&$$$$||00)"3"33
 3 3 5a 8 = ==== 	

 (99!$<<#
 $667I6^hi-6)..-H)f!-F F#N5G$GX]^";/IAvA~!z/2v{{+I!::nd;L#ZZ(8$?N$""K (""N &""L &00J##J/&&z2{{:UZZ@!),D";#8#8#=#=#?@CG#'(8(8(;(@(@(B#CA#FL!'!5!5!7J%/%A%A%I`I`Ib%cN ,,\6;O;O;QT^;^_ ''fE2<2R2R!7!7!9a3/K  ++L.6:QR&&{D&A$.$@$@"$$/%M 88W
 #GG&113&%#$%*48&& #..|^f=TU&7&55g~Oacrs'1,")).$G#((8"//^WUo.i)B)BCh(@(@A68S8ST**<l\c*d"--lNLbi-j<#<#<#L"Q"Q"SSDyyu - !| !!#$$&c 0f 	G !s   0K
U<UU$	N)Fr[   F)r	   F)r\   r]   r^   r   r   r}   r   r   r4   staticmethodr   r   r   r   FLOAT32r  r_   r   r   ra   ra   s   s     ")9V]J ]%6N&4P%AN  $ 6 6( 	8 	8  ##%h hr   ra   )loggingrU   r   r$   r   r   r:   benchmark_helperr   gpt2_helperr   r   	getLoggerr\   loggerr   ra   r_   r   r   <module>r     sM      	     & .			8	$[2 [2|B Br   