
    gL                          d dl Z d dlZd dlZd dlmZ ddlmZmZmZm	Z	m
Z
 ddlmZ ddlmZ  G d d	e      Z	  G d
 de      Z	  G d de      Z G d de      Zy)    N)onnx_pb   )TENSOR_NAME_QUANT_SUFFIXQuantizedValueQuantizedValueTypefind_by_nameget_mul_node   )QuantOperatorBase)QDQOperatorBasec                   $     e Zd Z fdZd Z xZS )	QOpMatMulc                 &    t         |   ||       y Nsuper__init__selfonnx_quantizer	onnx_node	__class__s      ^/var/www/openai/venv/lib/python3.12/site-packages/onnxruntime/quantization/operators/matmul.pyr   zQOpMatMul.__init__       3    c                    | j                   j                  | j                        s.t        j                  d| j                  j
                   d       y| j                   j                  | j                  j                  d         s`| j                   j                  | j                  j                  d         s.t        j                  d| j                  j
                   d       y| j                   j                  rw| j                   j                  | j                  j                  d         sEt        j                  d| j                   j                   d| j                  j
                   d       yy	)
NzIgnore MatMul ]Fr
   r   z&Ignore MatMul due to non float inputs z%Ignore MatMul due to non constant B: [T)	quantizershould_quantize_nodenodeloggingdebugnameis_float_tensorinputinfoq_matmul_const_b_onlyfind_initializer_in_pathgraph_scope)r   s    r   should_quantizezQOpMatMul.should_quantize   s   ~~22499=MMN499>>*:!<=..tyyq/AB..tyyq/ABLLA$))..AQQRST >>//>>::499??1;MNDT^^E_E_D``abfbkbkbpbpaqqrstr   )__name__
__module____qualname__r   r+   __classcell__r   s   @r   r   r      s    4r   r   c                   $     e Zd Z fdZd Z xZS )MatMulIntegerc                 &    t         |   ||       y r   r   r   s      r   r   zMatMulInteger.__init__)   r   r   c                    | j                   }|j                  dk(  sJ | j                  j                  |dg      \  }}}}| j                  j	                  |dgdd      \  }}}}	|j                  |       |j                  |       |j                  |       |j                  |	       |j                  d   dz   }
|j                  r|j                  dz   nd}t        j                  j                  d	||z   |
g|      }|j                  |       |
d
z   }| j                  j                  |j                  d   d      }t        j                  j                  d|
g|g|
dz   |      }|j                  |       t        |      dk(  sJ |r|dz   n|d   dz   |d   z   dz   }t        || j                  j                        }|!t!        ||dz   |      }|j                  |       |j                  d   }d}|r|dz   }|j                  t!        ||g|j                  d   |             | j                  xj                  |z  c_        y )NMatMulr   r
   Treduce_rangeop_level_per_channel_output_quantized_quant r2   _cast_output)	mandatoryCast_cast)tor   _scales_mul__mulz:0_output_scale_mul)r!   op_typer   quantize_activationquantize_weightextendoutputr$   onnxhelper	make_nodeappendget_tensor_typelenr   	new_nodesr	   )r   r!   quantized_input_nameszero_point_namesscale_namesnodesquantized_input_names_weightzero_point_names_weightscale_names_weightnodes_weightmatmul_integer_outputmatmul_integer_namematmul_integer_nodecast_op_outputotype	cast_nodescales_mul_opscales_mul_nodescales_mul_op_outputoutput_scale_mul_ops                       r   quantizezMatMulInteger.quantize,   sl   yy||x''' NN..taS9	
! NN**4!4^b*c	
(#$$%AB 78-.\" $A1D D6:iidii(2R"kk33!$44"#	
 	() /?..t{{1~.NKK))"#!G+ * 
	 	Y ;1$$$ #  -/Q#%A6? 	 '}dnn6N6NO"*;8Lm\OLL).55a8 !"58K"K!56A#	
 	  E) r   r,   r-   r.   r   rc   r/   r0   s   @r   r2   r2   (   s    4G*r   r2   c                   (     e Zd Z fdZ fdZ xZS )QLinearMatMulc                 &    t         |   ||       y r   r   r   s      r   r   zQLinearMatMul.__init__|   r   r   c                    | j                   }|j                  dk(  sJ | j                  j                  |dg      \  }}}}| j                  j	                  |dgdd      \  }}}}	|j                  |       |j                  |       |j                  |       |j                  |	       | j                  j                  |j                  d         \  }
}}}}|
r|t        | %         S |j                  d   t        z   }|j                  r|j                  dz   nd}g }|j                  |d          |j                  |d          |j                  |d          |j                  |d          |j                  |d          |j                  |d          |j                  |       |j                  |       | j                  j                  t        j                  j                   t        j                  j"                  t        j                  j$                  t        j                  j&                  hv rdnd}t(        j*                  j-                  d	||g||
      }|j                  |       t/        |j                  d   |||t0        j2                        }|| j                  j4                  |j                  d   <   | j                  xj6                  |z  c_        y )Nr5   r   r
   Tr6   r:   r;   zcom.microsoftrf   )domain)r!   rE   r   rF   rG   rH   _get_quantization_paramsrI   r   rc   r   r$   rM   weight_qType
onnx_protoTensorProtoFLOAT8E4M3FNFLOAT8E4M3FNUZ
FLOAT8E5M2FLOAT8E5M2FNUZrJ   rK   rL   r   r   Inputquantized_value_maprP   )r   r!   rQ   rR   rS   rT   rU   rV   rW   rX   
data_foundoutput_scale_nameoutput_zp_namerB   qlinear_matmul_outputqlinear_matmul_nameqlinear_matmul_inputsri   qlinear_matmul_nodeq_outputr   s                       r   rc   zQLinearMatMul.quantize   s   yy||x''' NN..taS9	
! NN**4!4^b*c	
(#$$%AB 78-.\" NN33DKKNC	
2:7#%% $A1I I6:iidii(2R "$$%:1%=>$$[^4$$%5a%89$$%:1%=>$$[^4$$%5a%89$$%67$$^4 ~~**&&33&&55&&11&&55	   	 #kk33!"# 4 
 	() "KKN!$$
 >F**4;;q>:  E) r   rd   r0   s   @r   rf   rf   {   s    4N* N*r   rf   c                   $     e Zd Z fdZd Z xZS )	QDQMatMulc                 &    t         |   ||       y r   r   r   s      r   r   zQDQMatMul.__init__   r   r   c                 0   | j                   }|j                  dk(  sJ | j                  r|j                  }n*t	        j
                  |j                  |j                        }|D ]  }t        || j                  j                  j                               rf| j                  j                  |d|j                        \  }}|r| j                  j                  ||       {| j                  j                  |       | j                  j                  |        y )Nr5   r
   )default_axisrE   )r!   rE   disable_qdq_for_node_outputr&   	itertoolschainrI   r   r   modelinitializeris_tensor_per_channel"quantize_weight_tensor_per_channelquantize_weight_tensorquantize_activation_tensor)r   r!   nodes_to_iteratetensor_nameis_per_channelchannel_axiss         r   rc   zQDQMatMul.quantize   s    yy||x'''++#zz(tzz4;;G+KK)=)=)I)I)KL/3~~/S/Sa 0T 0, "NNEEkS_`NN99+F99+F ,r   rd   r0   s   @r   r}   r}      s    4Gr   r}   )r   r"   rJ   r   rl   quant_utilsr   r   r   r   r	   base_operatorr   qdq_base_operatorr   r   r2   rf   r}    r   r   <module>r      s`       & r r , .! .
K*I K*\
R*I R*jG Gr   