
    g&                         d dl Zd dlZd dlmZ ddlmZmZmZm	Z	m
Z
mZ ddlmZ ddlmZ  G d d	e      Z G d
 de      Z G d de      Zy)    N)onnx_pb   )TENSOR_NAME_QUANT_SUFFIXQuantizedValueQuantizedValueTypeattribute_to_kwargfind_by_nameget_mul_node   )QuantOperatorBase)QDQOperatorBasec                   *     e Zd Z fdZd Zd Z xZS )ConvIntegerc                 &    t         |   ||       y Nsuper__init__selfonnx_quantizer	onnx_node	__class__s      \/var/www/openai/venv/lib/python3.12/site-packages/onnxruntime/quantization/operators/conv.pyr   zConvInteger.__init__       3    c                 6   | j                   }| j                  j                  }t        |j                  d   |j                               }|t        d|j                  d    d      |j                  d   }|j                  d   }|dz   }|dz   }	t        j                  t        |j                        t        j                  	      }
d
|
d<   t        j                  j                  |t         j"                  j$                  t        |j                        g|
      }|j'                  |       t        j                  j)                  d||g|	g      }|j+                  |       t        j                  j)                  d||	g|g|dz         }|j+                  |       y)a  
        Given a node, this function handles bias add by adding a "reshape" node on bias and an "add" node
            parameter nodes: new nodes would be appended into nodes
            parameter node: current node (Conv)
            parameter scaled_output: output of quant conv without bias
            parameter output: output of Conv
            parameter bias_name: bias of Conv
            return: the name of output
        r   Nz	Expected z to be an initializerr   r   _bias_reshape_shape_bias_reshape_output)dtypeReshapeAdd	_bias_add)node	quantizermodelr	   inputinitializer
ValueErroroutputnponeslendimsint64onnxhelpermake_tensor
onnx_protoTensorProtoINT64add_initializer	make_nodeappend)r   nodesscaled_outputr%   r'   weightr+   reshape_input_datareshape_input_shapereshape_outputshape
init_shapereshape_nodeadd_nodes                 r   add_biaszConvInteger.add_bias   sb    yy$$djjmU->->-@A>yA7LMNN Q!ZZ]$'<<"88V[[)"((;a[[,,!7!7!=!=FKK@P?QSX

 	j){{,,Y9KM`8adrcst\" ;;((0ORXQY[ado[opXr   c                    | j                   }|j                  dk(  sJ | j                  j                  |dg      \  }}}}| j                  j	                  |dg| j                  j
                        \  }}}}	|j                  |       |j                  |       |j                  |       |j                  |	       |j                  d   dz   }
|j                  r|j                  dz   nd}i }|j                  D ]  }|j                  t        |              t        j                  j                  d||z   |
g|fi |}|j                  |       | j                  j!                  |j                  d   d	
      }|
dz   }t        j                  j                  d|
g|g|
dz   |      }|j                  |       t#        |      dk(  sJ |r|dz   }n|d   dz   |d   z   dz   }t%        || j                  j&                        }|!t)        ||dz   |      }|j                  |       |j                  d   }t#        |j*                        dk(  }|s|j                  d   n|j                  d   dz   }|r|dz   nd}|j                  t)        ||g||             |r| j-                  ||       | j                  xj&                  |z  c_        y )NConvr   r   reduce_range_output_quantized_quant r   T)	mandatory_cast_outputCast_cast)tor   _scales_mul__mulz:0   quant_scaled_output_output_scale_mul)r%   op_typer&   quantize_activationquantize_weightrH   extendr+   name	attributeupdater   r1   r2   r8   r9   get_tensor_typer.   r	   	new_nodesr
   r(   rD   )r   r%   quantized_input_nameszero_point_namesscale_namesr:   quantized_input_names_weightzero_point_names_weightscale_names_weightnodes_weightconv_integer_outputconv_integer_namekwargsr\   conv_integer_node	onnx_typecast_op_output	cast_nodescales_mul_opscales_mul_nodescales_mul_op_outputhas_biasscaled_output_nameoutput_scale_mul_ops                           r   quantizezConvInteger.quantize:   s   yy||v%%% NN..taS9	
! NN**4!4>>C^C^*_	
(#$$%AB 78-.\""kk!n/BB48IIDII02IMM,Y78 ( KK1103CCFYEZ\m
qw
 	&' NN224;;q>T2R	,~=KK)) !') * 
	 	Y ;1$$$-=M'NS0;q>AFJM&}dnn6N6NO"*;8Lm\OLL).55a8tzz?a'3;T[[^QRgAg J[/2EE`b!56"#	
 MM%!34  E) r   )__name__
__module____qualname__r   rD   rt   __classcell__r   s   @r   r   r      s    4#JL*r   r   c                   (     e Zd Z fdZ fdZ xZS )QLinearConvc                 &    t         |   ||       y r   r   r   s      r   r   zQLinearConv.__init__   r   r   c                 d   | j                   }|j                  dk(  sJ | j                  j                  |j                  d         \  }}}}}| j                  j                  |j                  d         r| j                  j                         r| j                  j                  |dg      \  }}}}	| j                  j                  |j                  d   t        j                  j                  d      }
|j                  |
d          |j                  |
d          |j                  |
d          n| j                  j                  |dg      \  }}}}	| j                  j                  |dg| j                  j                        \  }}}}|j!                  |       |j!                  |       |j!                  |       |	j!                  |       |r|t"        | I         S d}d}t'        |j                        dk(  r| j                  j(                  t        j                  j*                  k(  rt-        d	      | j                  j/                  |j                  d   |j                  d   |j                  d         }d
}|j                  d   t0        z   }|j2                  r|j2                  dz   nd}i }|j4                  D ]  }|j7                  t9        |              g }|j                  |d          |j                  |d          |j                  |d          |j                  |d          |j                  |d          |j                  |d          |j                  |       |j                  |       |r|j                  |       t;        j<                  j>                  d||g|fi |}|	j                  |       tA        |j                  d   |||tB        jD                        }|| j                  jF                  |j                  d   <   | j                  xjH                  |	z  c_$        y )NrF   r   r   r   rG   rK   FrT   z@Quantization to FLOAT8E4M3FN for operator Conv is not supported.TrJ   r{   )%r%   rW   r&   _get_quantization_paramsr+   is_input_a_initializerr(   is_per_channelrX   quantize_weight_per_channelr4   r5   INT8r9   rY   rH   rZ   r   rt   r.   weight_qTypeFLOAT8E4M3FNRuntimeErrorquantize_bias_staticr   r[   r\   r]   r   r1   r2   r8   r   r   Inputquantized_value_mapr_   )r   r%   
data_foundoutput_scale_nameoutput_zp_namerR   r`   ra   rb   r:   quant_weight_tuplerc   rd   re   rf   quantized_bias_namebias_presentqlinear_conv_outputqlinear_conv_nameri   r\   qlinear_conv_inputsqlinear_conv_nodeq_outputr   s                           r   rt   zQLinearConv.quantize   s   yy||v%%% NN33DKKNC	
 >>00A?DNNDaDaDc 224!=% !%!K!K

1z55::A" "(();A)>?##$6q$9:1!45 224!=%  ..taSt~~GbGb.c,'"!(()EF##$;<12LL&2:7#%% tzz?a~~**j.D.D.Q.QQ"#eff"&.."E"EdjjQRmUYU_U_`aUbdhdndnopdq"rL"kk!n/GG48IIDII02IMM,Y78 ( ""#8#;<"";q>2""#3A#67""#8#;<"";q>2""#3A#67 	""#45"">2&&':; KK11.1D0EGX
\b
 	&' "KKN$$
 >F**4;;q>:  E) r   ru   rv   rw   r   rt   rx   ry   s   @r   r{   r{      s    4]* ]*r   r{   c                   $     e Zd Z fdZd Z xZS )QDQConvc                 &    t         |   ||       y r   r   r   s      r   r   zQDQConv.__init__   r   r   c                    | j                   }|j                  dk(  s|j                  dk(  sJ | j                  j                  |j                  d          | j
                  s(| j                  j                  |j                  d          | j                  j                  |j                  d   |j                  dk(  rdnd      \  }}|r*| j                  j                  |j                  d   |       n(| j                  j                  |j                  d          t        |j                        dk(  rP| j                  j                  |j                  |j                  d   |j                  d   |j                  d          y y )NrF   ConvTransposer   r   )default_axisrT   r   )r%   rW   r&   quantize_activation_tensorr(   disable_qdq_for_node_outputr+   is_tensor_per_channel"quantize_weight_tensor_per_channelquantize_weight_tensorr.   quantize_bias_tensorr[   )r   r%   is_weight_per_channelweight_axiss       r   rt   zQDQConv.quantize   s)   yy||v%)HHH11$**Q-@//NN55dkk!nE-1^^-Q-QJJqMT\\V-C .R .
*{ !NN==djjm[YNN11$**Q-@tzz?aNN//		4::a=$**UV-Y]YcYcdeYfg  r   r   ry   s   @r   r   r      s    4hr   r   )numpyr,   r1   r   r4   quant_utilsr   r   r   r   r	   r
   base_operatorr   qdq_base_operatorr   r   r{   r    r   r   <module>r      sN      &  - .u*# u*pa*# a*Hho hr   