
    g                         d dl Z d dlZd dlZddlmZ ddlmZ ddlmZmZm	Z	m
Z
mZ ddlmZ ddlmZ dd	lmZ d
 Zd Zd Z G d de      Z G d de      Zy)    N   )find_by_name)get_mul_node)TENSOR_NAME_QUANT_SUFFIXQuantizedValueQuantizedValueTypeattribute_to_kwarg	ms_domain   )QuantOperatorBase)	QOpMatMul)QDQOperatorBasec                     | j                   D cg c]  }|j                  dk(  s| }}t        |      r%t        j                  j                  |d         dkD  S yc c}w )NtransBr   F	attributenamelenonnxhelperget_attribute_value)	gemm_nodeattrtransB_attributes      \/var/www/openai/venv/lib/python3.12/site-packages/onnxruntime/quantization/operators/gemm.pyis_B_transposedr      s\    )2)<)<V)<		X@U)<V
{{../?/BCaGG	 Ws
   AAc                     | j                   D cg c]  }|j                  dk(  s| }}t        |      r"t        j                  j                  |d         S yc c}w )Nbetar         ?r   r   r   beta_attributes      r   get_betar"      sT    '0':':R':tdii6>Qd':NR
>{{..~a/@AA	 Ss
   AAc                     | j                   D cg c]  }|j                  dk(  s| }}t        |      r
d|d   _        yc c}w )Nr   r   r   )r   r   r   fr    s      r   set_default_betar%      sG    '0':':R':tdii6>Qd':NR
>!q	 Ss
   A A c                   (     e Zd Z fdZ fdZ xZS )QLinearGemmc                 &    t         |   ||       y Nsuper__init__selfonnx_quantizer	onnx_node	__class__s      r   r,   zQLinearGemm.__init__'       3    c           	      &   | j                   }|j                  dk(  sJ | j                  j                  |j                  d         \  }}}}}| j                  j                  |j                  d         r| j                  j                         r| j                  j                  |dg      \  }}}}	| j                  j                  |j                  d   | j                  j                  t        |      rdnd      }
|j                  |
d          |j                  |
d          |j                  |
d          n| j                  j                  |dg      \  }}}}	| j                  j                  |dg| j                  j                        \  }}}}|j                  |       |j                  |       |j                  |       |	j                  |       |r|t         | E         S d}t%        |j                        dk(  r| j                  j                  |j                  d         st         | E         S | j                  j'                  |j                  d   |j                  d   |j                  d   t)        | j                               }|j                  d   t*        z   }|j,                  r|j,                  dz   nd}i }|j.                  D ],  }|j,                  d	k7  s|j1                  t3        |             . t4        |d
<   g }t7        d      D ]  }|j                  ||   ||   ||   g       ! |j                  |||g       t9        j:                  j<                  d||g|fi |}|	j                  |       t?        |j                  d   |||t@        jB                  |j                  | j                  j                        }|| j                  jD                  |j                  d   <   | j                  xjF                  |	z  c_#        y )NGemmr   r   r   )reduce_range    _quantr   domainQGemm)	node_type
node_qtype)$nodeop_type	quantizer_get_quantization_paramsoutputis_input_a_initializerinputis_per_channelquantize_activationquantize_weight_per_channelweight_qTyper   appendquantize_weightr6   extendr+   quantizer   quantize_bias_staticr"   r   r   r   updater	   r
   ranger   r   	make_noder   r   Inputquantized_value_map	new_nodes)r.   r>   
data_foundoutput_scale_nameoutput_zp_name_quantized_input_nameszero_point_namesscale_namesnodesquant_weight_tuplequantized_input_names_weightzero_point_names_weightscale_names_weightnodes_weightquantized_bias_nameqgemm_output
qgemm_namekwargsr   qgemm_inputsi
qgemm_nodeq_outputr1   s                           r   rL   zQLinearGemm.quantize*   s   yy||v%%% NN33DKKNC	
 >>00A?DNNDaDaDc 224!=% !%!K!K

1++$T*"
 "(();A)>?##$6q$9:1!45 224!=%  ..taSt~~GbGb.c,'"!(()EF##$;<12LL&2:7#%% tzz?a>>88AGw')) #'.."E"E

1tzz!}djjmXdii=P# {{1~(@@-1YYTYY)B
I~~'0;< ( %x qA!6q!9;q>K[\]K^ _`  	02C^TU[[**7L<.R\g`fg
Z  "KKN$$ll~~22
 >F**4;;q>:  E) r3   __name__
__module____qualname__r,   rL   __classcell__r1   s   @r   r'   r'   &   s    4\* \*r3   r'   c                   $     e Zd Z fdZd Z xZS )QDQGemmc                 &    t         |   ||       y r)   r*   r-   s      r   r,   zQDQGemm.__init__   r2   r3   c           	         | j                   }|j                  dk(  sJ | j                  j                  |j                  d          | j
                  s(| j                  j                  |j                  d          | j                  j                  |j                  d   t        |      rdnd      \  }}|r*| j                  j                  |j                  d   |       n(| j                  j                  |j                  d          t        |j                        dk(  r| j                  j                  |j                  d         ry| j                  j                  |j                  |j                  d   |j                  d   |j                  d   t        | j                                t!        | j                          y t#        j$                  d| j                   j                   d       y y )	Nr5   r   r   )default_axisr8   r   zBias of Gemm node 'zC' is not constant. Please exclude this node for better performance.)r>   r?   r@   quantize_activation_tensorrD   disable_qdq_for_node_outputrB   is_tensor_per_channelr   "quantize_weight_tensor_per_channelquantize_weight_tensorr   rC   quantize_bias_tensorr   r"   r%   loggingwarning)r.   r>   is_weight_per_channelweight_axiss       r   rL   zQDQGemm.quantize   so   yy||v%%%11$**Q-@//NN55dkk!nE-1^^-Q-QJJqM_T-B .R .
*{ !NN==djjm[YNN11$**Q-@tzz?a~~44TZZ]C33IItzz!}djjmTZZ]HUYU^U^L_ !+)$))..)99|}  r3   ri   rn   s   @r   rp   rp      s    4r3   rp   )rz   numpynpr   quant_utilsr   r   r   r   r   r	   r
   base_operatorr   matmulr   qdq_base_operatorr   r   r"   r%   r'   rp    r3   r   <module>r      sM       & & u u ,  .`*) `*Fo r3   