
    g                         d dl Z d dlZd dlZd dlZd dlmZ ddlmZm	Z	 ddl
mZ ddlmZ ddlmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZmZ ddlm Z   G d	 d
e      Z!y)    N)onnx_pb   )BaseQuantizerQuantizationParams)
TensorData)	ONNXModel)TENSOR_NAME_QUANT_SUFFIXQuantizationModeQuantizedValueQuantizedValueType__producer____version__add_infer_metadataattribute_to_kwargcompute_scale_zpcompute_scale_zp_float8find_by_nameget_qmin_qmax_for_qTypeget_qrange_for_qType	ms_domain&save_and_reload_model_with_shape_infertensor_proto_to_array)CreateOpQuantizerc                       e Zd Z	 ddZd Zd Zd Zd Zd Zd Z	d	 Z
dd
Zd Zd Zd Zd ZddZ	 ddZd Zd dZd ZddZ	 	 	 	 d!dZ	 	 	 	 	 d"dZd#dZ	 	 d$dZd Zd Zd Zy)%ONNXQuantizerNc                 <   t        j                  | |||||||	|
||       |s| j                  j                          t	        | j                  j                        }|j
                  j                  D ci c]  }|j                  | c}| _        | j                  j                  |j
                  j                  D ci c]  }|j                  | c}       | j                  j                  |j
                  j                  D ci c]  }|j                  | c}       t        |      | _        || _        || _        | j                  dkD  | _        d| j"                  v xr | j"                  d   | _        g | _        d| _        i | _        | j*                  j                  |j
                  j                  D ci c]  }|j                  d c}       | j*                  j                  |j
                  j                  D ci c]  }|j                  d c}       | j                  j                  j
                  j,                  D ]6  }| j*                  j                  |j                  D ci c]  }|d c}       8 | j                  t.        vrt1        d| j                         | j3                         | _        d| _        d| _        d| _        d	| _        i | _        | j                  jA                         | _!        y c c}w c c}w c c}w c c}w c c}w c c}w )
N
   MatMulConstBOnly/r   zunsupported quantization mode fixed_quantization_range_uint8fixed_quantization_range_int8
fixed_zerofixed_zero_zp)"r   __init__modelreplace_gemm_with_matmulr   graph
value_infonamevalue_infosupdateoutputinputr   modestaticopset_versionfuse_dynamic_quantextra_optionsq_matmul_const_b_only	new_nodesgraph_scopetensor_namesnoder
   
ValueErrorcalculate_quantization_paramsquantization_paramsfixed_qrange_uint8_namefixed_qrange_int8_namefixed_zero_namefixed_zero_zp_namequantized_value_mapget_non_initializer_inputsgenerated_value_names)selfr%   per_channelreduce_ranger.   r/   weight_qTypeactivation_qTypetensors_rangenodes_to_quantizenodes_to_excludeop_types_to_quantizer2   viotitr7   output_names                     \/var/www/openai/venv/lib/python3.12/site-packages/onnxruntime/quantization/onnx_quantizer.pyr$   zONNXQuantizer.__init__&   s    	 	
 JJ//1:4::;K;KLE6;kk6L6LM6L6LMD##5;;;M;M$N;MRRWWb[;M$NO##5;;;L;L$M;LRRWWb[;L$MN"5)DJ	"&"4"4r"9%74;M;M%M%xRVRdRdewRx"  u{{7I7I!J7I"''1*7I!JK  u{{7H7H!I7H"''1*7H!IJJJ$$**//D$$%Tk1n%TU 0 99,,=dii[IJJ#'#E#E#G  (H$&E#+"1 $&  &*ZZ%J%J%L"K  N$N$M "K!I%Ts$   4L =LL
8L L+
L
c                 :   t         j                  j                  |d| j                  j                  j                        }t        |       t        || j                  | j                  | j                  | j                  | j                  | j                  | j                  | j                  | j                  | j                   | j"                        }| |_        | j&                   | d|_        |j)                          |j                  j                  j*                  S )z
        generate submodel for the subgraph, so that we re-utilize current quantization implementation.
        quantize the submodel
        update subgraph and set it back to node
        onnx-quantizer)producer_nameopset_importsr   )onnxhelper
make_modelr%   opset_importr   r   rC   rD   r.   r/   rE   rF   rG   rH   rI   rJ   r2   parentr5   quantize_modelr'   )rB   subgraph	graph_keywarped_modelsub_quantizers        rO   quantize_subgraphzONNXQuantizer.quantize_subgrapho   s     {{--*****77 . 

 	<(%IIKK!!""!!%%
  $'+'7'7&81$E!$$&""((...    c                 6   |j                   D cg c]R  }|j                  t        j                  j                  k(  s'|j                  t        j                  j
                  k(  r|T }}t        |      dk(  r|S |j                  r|j                  n#|j                   dt        | j                         }i }|j                   D ]  }|j                  t        j                  j                  k(  r8|j                  | j                  |j                  | d|j                         i}n|j                  t        j                  j
                  k(  r_g }|j                  D ]?  }|j                  | j                  || d|j                   dt        |             g       A |j                  |i}nt        |      }|j                  |        t        j                   j"                  |j                  |j$                  |j&                  fd|j                  i|S c c}w )z|
        Check subgraph, if any, quantize it and replace it.
        return new_nodes added for quantizing subgraph
        r   _node_count_:r)   )	attributetyperT   AttributeProtoGRAPHGRAPHSlenr)   op_typer4   r^   ggraphsextendr   r+   rU   	make_noder-   r,   )	rB   r7   attrgraph_attrs	node_namekwargskvvaluerZ   s	            rO   quantize_node_with_sub_graphz*ONNXQuantizer.quantize_node_with_sub_graph   s    
&yyD//555dFYFYF`F`9` & 	 

 {q K!%DII4<<.SQUQ_Q_M`La0b	NNDyyD//555ii!7!79+Qtyyk@Z![\d11888 $HLL 22 (#,+Qtyyk3u:, G !, ii''-MM"# #$ {{$$T\\4::t{{eQUQZQZe^dee7
s   AHc                 V    t        d | j                  j                         D              S )zQ
        Detect if model already has QuantizeLinear or DequantizeLinear.
        c              3   \   K   | ]$  }|j                   d k(  xs |j                   dk(   & yw)QuantizeLinearDequantizeLinearN)ri   ).0r7   s     rO   	<genexpr>z.ONNXQuantizer.has_QDQ_nodes.<locals>.<genexpr>   s2      
_qW[DLL,,R@R0RR_qs   *,)anyr%   nodes)rB   s    rO   has_QDQ_nodeszONNXQuantizer.has_QDQ_nodes   s-      
_c_i_i_o_o_q
 
 	
r_   c                     t        || j                  j                               y| j                  | j                  j	                  |      S y)NTF)r   r%   initializerrX   find_initializer_in_path)rB   initializer_names     rO   r   z&ONNXQuantizer.find_initializer_in_path   sC    ($***@*@*BCO;;";;778HIIr_   c                     | j                   j                  |       |D ].  }|j                  D ]  }| j                  j	                  |        0 y N)r4   rl   r,   rA   add)rB   r|   r7   rN   s       rO   add_new_nodeszONNXQuantizer.add_new_nodes   s@    e$D#{{**..{;  + r_   c                    | j                         rt        j                  d       | j                  j	                         D ]  }| j
                  r| j                  |      }t        | j                        }t        | |      }|j                          t        |t        | j                              D ];  }| j                  |   j                  D ]  }| j                  j                  |        =  | j                          | j                  j!                         j#                  d       | j                  j!                         j$                  j'                  | j                         | j(                  B| j                  j+                         \  }}t        |      dkD  rt-        dt/        |      z         t0        | j                  j                  _        t4        | j                  j                  _        | j                  j                  j8                  D cg c]  }|j:                  t<        k(  s| }	}|	sk| j                  D cg c]  }|j:                  dk(  s| }
}|
r@| j                  j                  j8                  j                         }d|_        t<        |_        | j                  j                  S c c}w c c}w )NzPlease check if the model is already quantized. Note you don't need to quantize a QAT model. OnnxRuntime support to run QAT model directly.r7   r   z0Invalid model with unknown initializers/tensors.zcom.microsoftr   ) r}   loggingwarningr%   r|   enable_subgraph_quantizationrt   rh   r4   r   quantizeranger,   rA   r   _dequantize_outputsr'   
ClearFieldr7   rl   rX   clean_initializersRuntimeErrorstrr   rR   r   producer_versionrW   domainr   version)rB   r7   number_of_existing_new_nodesop_quantizerirN   _initializers_not_foundopsetms_opsetms_nodess              rO   rY   zONNXQuantizer.quantize_model   s    OOn
 JJ$$&D0088>+.t~~+>(,T48L!!#7T^^9LM#'>>!#4#;#;K..22;? $< N ' 	  " 	

%%f-

&&t~~6 ;;(,

(E(E(G%A%)*Q."#UX[\rXs#stt)5

&,7

)'+zz'7'7'D'Db'DeXaHaE'Db)-Z4;;/;YHZ

((5599; !(zz cZs   J6-J6J;J;c                     d| j                   v r3t        j                  d|| j                   d          | j                   d   S t        d|d      )NDefaultTensorTypezDget_tensor_type returns DefaultTensorType for tensor name %r, use %dz)Unable to find data type for weight_name=a7  . shape_inference failed to return a type probably this node is from a different domain or using an input produced by such an operator. This may happen if you quantize a model already quantized. You may use extra_options `DefaultTensorType` to indicate the default weight type, usually `onnx.TensorProto.FLOAT`.)r2   r   infor   rB   tensor_names     rO   _get_default_tensor_typez&ONNXQuantizer._get_default_tensor_type   sf    $"4"44LLV""#67
 %%&9::7 GI J
 	
r_   c                    t        || j                  j                               }||j                  S || j                  v r| j                  |   }|j
                  j                  d      rV|r4|j
                  j                  j                  dk(  r| j                  |      S |j
                  j                  j                  S | j                  r| j                  |r| j                  |      S y | j                  j                  |      }||S | j                  r+| j                  r| j                  j                  |      }||S |r| j                  |      S y )Ntensor_typer   )r   r%   r   	data_typer*   rd   HasFieldr   	elem_typer   r   rX   is_valid_quantize_weightget_tensor_type)rB   r   	mandatoryweightrK   otyperess          rO   r   zONNXQuantizer.get_tensor_type  s*   k4::+A+A+CD###$***!!+.Bww.!4!4!>!>!!C88EEww**44411t{{7J44[AA44[AL,,++--k:C
00==r_   c                 H   | j                  |      r| j                  |      S || j                  v r| j                  |   }|j                  j	                  d      rU|j                  j
                  j                  t        j                  j                  t        j                  j                  fv ryt        j                  d|d|j                   d       y| j                  r'| j                  r| j                  j                  |      S t        j                  d|d       y)	Nr   Tz<Inference failed or unsupported type to quantize for tensor z
, type is .Fz%Failed to infer data type of tensor: zS. Please add data type info for this tensor if your model has customized operators.)is_input_a_initializerr   r*   rd   r   r   r   
onnx_protoTensorProtoFLOATFLOAT16r   r   r   rX   is_float_tensor)rB   r   rK   s      rO   r   zONNXQuantizer.is_float_tensor  s   &&{300==$***!!+.Bww.2773F3F3P3P&&,,&&..U 4 OON{o]ghjhohogppqr ,,;;..{;;3K? C6 7	
 r_   c                     |t         j                  j                  k(  r| j                  |||      S |t         j                  j                  k(  r| j                  |||      S t        d| d      )a  
        Create nodes for dynamic quantization of input and add them to nodes_list.
            parameter input_name: Name of the input.
            parameter nodes_list: new nodes are appended to this list.
            parameter qType: type to quantize to.
            parameter initial_type: type to quantize from
            return: scale_name, zero_point_name, scale_shape, zero_point_shape.
        zUnexpected value for qType=r   )r   r   INT8+_get_dynamic_input_quantization_params_int8UINT8,_get_dynamic_input_quantization_params_uint8r8   )rB   
input_name
nodes_listqTypeinitial_types        rO   &_get_dynamic_input_quantization_paramsz4ONNXQuantizer._get_dynamic_input_quantization_params5  so     J**///CCJPZ\hiiJ**000DDZQ[]ijj6ugQ?@@r_   c                    t         j                  j                  }|dz   }|dz   }t        j                  j                  d|g|dz   g|d      }|j                  |       |dz   }t        j                  j                  d|g|dz   g|d      }	|j                  |	       |d	z   }
t        j                  j                  d
|j                  d   g|
dz   g|
      }|j                  |       |d	z   }t        j                  j                  d
|	j                  d   g|dz   g|      }|j                  |       |dz   }t        j                  j                  d|j                  d   |j                  d   g|dz   g|      }|j                  |       t        j                  j                  | j                  |g t        |      dz  g      }| j                  j                  |       |dz   }t        j                  j                  d|j                  d   | j                  g|g|      }|j                  |       t        j                  j                  | j                  |g dg      }| j                  j                  |       || j                  g g fS )az  
        Create nodes for dynamic quantization of input to int8 and add them to nodes_list
            parameter input_name: Name of the input.
            parameter nodes_list: new nodes are appended to this list.
            parameter initial_type: initial weight type (FLOAT or FLOAT16)
            return: scale_name, zero_point_name, scale_shape, zero_point_shape.
        _scale
_ReduceMin	ReduceMin:0r   keepdims
_ReduceMax	ReduceMax_AbsAbs_Abs_MaxMaxg       @	scale_DivDiv)r   r   r   rT   rU   rm   appendr,   make_tensorr<   r   r%   add_initializerr>   )rB   r   r   r   r   input_scale_namereduce_min_namereduce_min_nodereduce_max_namereduce_max_nodereduce_min_abs_namereduce_min_abs_nodereduce_max_abs_namereduce_max_abs_nodeabs_max_nameabs_max_nodeinitializer_divscale_div_namescale_div_nodeinitializer_zps                       rO   r   z9ONNXQuantizer._get_dynamic_input_quantization_params_int8D  s    &&++ &0$|3++//Lt#$ 0 
 	/*$|3++//Lt#$ 0 
 	/* .6"kk33##A&' 4'(	
 	-.-6"kk33##A&' 4'(	
 	-.!J.{{,, ''*,?,F,Fq,IJD !	
 	,'++11''!%(3./	
 	

""?3#k1..  #T%@%@A	
 	.) 001H1H%QSVWUXY

"">2!8!8"b@@r_   c                 .   t         j                  j                  }|dz   }|dz   }|dz   }t        j                  j                  d|g|dz   g|d      }|j                  |       |dz   }	t        j                  j                  d	|g|	dz   g|	d      }
|j                  |
       t        j                  j                  | j                  |g t        |      g      }| j                  j                  |       t        j                  j                  | j                  |g d
g      }| j                  j                  |       |dz   }t        j                  j                  d|
j                  d   |j                  d   g|dz   g|      }|j                  |       |dz   }t        j                  j                  d|j                  d   | j                  g|g|      }|j                  |       |dz   }t        j                  j                  d| j                  |j                  d   g|dz   g|      }|j                  |       |dz   }t        j                  j                  d|j                  d   |g|dz   g|      }|j                  |       |dz   }t        j                  j                  d|j                  |dz   g|      }|j                  |       |dz   }t        j                  j                  d|j                  |g||      }|j                  |       ||g g fS )a{  
        Create nodes for dynamic quantization of input to uint8 and add them to nodes_list
            parameter input_name: Name of the input.
            parameter nodes_list: new nodes are appended to this list.
            parameter initial_type: initial weight type (FLAOT or FLOAT16)
            return: scale_name, zero_point_name, scale_shape, zero_point_shape.
        r   _zero_pointr   r   r   r   r   r   r   g        
_scale_SubSub
_scale_Divr   _zero_point_Sub_zero_point_Div_zero_point_FloorFloor_zero_point_CastCast)to)r   r   r   rT   rU   rm   r   r   r;   r   r%   r   r=   r,   )rB   r   r   r   r   r   input_zp_namer   r   r   r   initializer_qrangeinitializer_qvaluescale_sub_namescale_sub_noder   r   zp_sub_namezp_sub_nodezp_div_namezp_div_nodezp_floor_namezp_floor_nodezp_cast_namezp_cast_nodes                            rO   r   z:ONNXQuantizer._get_dynamic_input_quantization_params_uint8  s6    &&,,%0"]2$|3++//Lt#$ 0 
 	/*$|3++//Lt#$ 0 
 	/* "[[44((!%()	
 	

""#56![[44T5I5I<Y[^a]bc

""#56 $l2..##A&(>(>q(ABd"#	
 	.)#l2..""1%t'C'CD	
 	.) !#44kk++!!?#9#9!#<=4 	
 	+& #44kk++"$454 	
 	+&"%88--g{7I7IM\`L`Kacpq-(!$66{{,,V]5I5IM?\hmr,s,'B66r_   c                 H   | j                   }||A| j                  || j                  vrt        j                  d| d       y| j                  |   }t	        |t
              st        dt        |       d|d      |t        |      dk7  rt        d| d	|       t        j                  |d
   g      }t        |d   d      r/|d   j                  t        j                  t        j                  fvrt        dt        |d          d|      t        j                  |d   g      }|j                  t        j                   k7  sJ |d   }n~t        j                  |g      }t        j                  |g      }| j                  |   }d|v r |d   j                  }|j#                  |      }|j                  t        j                   k7  sJ g }	|dz   }
g }|dz   }t$        j&                  j)                  |
||	|j+                         j-                               }| j.                  j1                  |       |j                  t        j                  k(  rt2        j4                  j6                  }nS|j                  t        j                  k(  rt2        j4                  j8                  }nt        d|j                   d|      t$        j&                  j)                  ||||j;                  d      j-                               }| j.                  j1                  |       d||
||	fS )a\  
        Create initializers and inputs in the graph for zero point and scale of output.
        Zero point and scale values are obtained from self.quantization_params if specified.
            parameter param_name: Name of the quantization parameter.
            return: result, scale_name, zero_point_name, scale_shape, zero_point_shape.
        z$Quantization parameters for tensor:"z" not specified)F r   r   r   Unexpected type  for r      zbQuantization parameters should contain zero point, scale, quant type. Specified values for output z: 
zero_pointscaledtypez and param_name=
quant_typer   r   zUnexpected dtype=z for param_name=)T)rF   r:   r   r   
isinstancer   	TypeErrorrd   rh   r8   nparrayhasattrr   float32float16float64astyperT   rU   r   raveltolistr%   r   r   r   r   r   reshape)rB   
param_name	use_scaleuse_zeropointzero_point_typeparamszero_point_valuesscale_valuesr   zero_point_shapezero_point_namescale_shape
scale_nameinit_zp
scale_type
init_scales                   rO   _get_quantization_paramsz&ONNXQuantizer._get_quantization_params  s    // 5''/:TE]E]3]CJ<_`,--j9Ff&89"24<.j^ST UVV~V!1 33=,bJ 
 !#&*>)? @6'?G4w8M8MVXV`V`bdblblUm8m #3D4I3JJZ[eZh!ijj88VG_$56L%%333$\2O "- 988YK0L--j9F& w--+2259%%333$}4(*
 ++))_.>@Q@W@W@Y@`@`@b
 	

""7++#//55J2::-#//77J01C1C0DDTU_Tbcdd[[,,Z[R^RfRfglRmRtRtRvw


"":.Z+?OOOr_   c           	         |j                   |   }|dk7  sJ d       |t        z   }|dz   }	|	|d||}}}
n| j                  |      \  }
}}}}g }|
r't        j                  j                  d|||g|g|	      }n| j                  ry| j                  rN|t        j                  j                  k(  r1|dz   }|dz   }t        j                  j                  d	|g|||g|	      }nU|J d
|d| d| d|        | j                  ||||      \  }}}}t        j                  j                  d|||g|g|	      }t        |||||      | j                  |<   g ||S )a  
        Given an input for a node (which is not a initializer), this function

        - add nodes to compute zero point and scale for this input if they don't exist.
        - add new QuantizeLinear node to quantize the input.

        :param node: node being quantized in NodeProto format.
        :param input_index: index of input in node.input.
        :param qType: type to quantize to.
        :param given_scale_name: if those inputs need to be quanitzed using this scale tensor.
        :param given_zp_name: if those inputs to be quantized using this zeropoint tensor.
        :param initial_type: type of the weight to quantize
        :return: List of newly created nodes in NodeProto format.
        r   z*Cannot access undefined variable in graph._QuantizeLinearNTrw   r   r   DynamicQuantizeLinearzCCannot quantize input without knowing the initial type, input_name=z, input_index=z, qType=z, node=r   )r-   r	   r  rT   rU   rm   r/   r1   r   r   r   r   r   r?   )rB   r7   input_indexr   given_scale_namegiven_zp_namer   r   rN   ql_node_name
data_foundr  zp_namer   r|   qlinear_noder  zp_shapes                     rO   _get_quantize_input_nodesz'ONNXQuantizer._get_quantize_input_nodes1  s   " ZZ,
RM!MM #;;!$55(}/H/35E}G
J484Q4QR\4]1J
GQ;;00 Z1	L {{ &&5J4J4J4P4P+P'(2
$}4#{{44+L *g6 	  $/ "",~k](SXRYY`ae`fh/ ??
ESXgs?t#{{44$W5 M 	  0>j+Wacjlq/r  ,%%%%r_   c                     || j                   v r| j                   |   S | j                  | j                  j                  |      S y r   )r?   rX   find_quantized_value)rB   r   s     rO   r,  z"ONNXQuantizer.find_quantized_valuew  sC    111++J77;;";;33J??r_   c           
         || j                   v r| j                   |   j                  S | j                   |   j                  }t        || j                  j                               }t        |      }|| j                   v r| j                   |   j                  }n5|| j                  v r| j                  |      \  }	}}	}	}	nt        d| d      t        || j                  j                               }
t        |
      }| j                  ||||      \  }}}}}}|| j                   vsJ t        ||||t        j                  |j                  dkD  rdnd||      }|| j                   |<   |S )z]
        Quantized the bias. Zero Point == 0 and Scale == Input_Scale * Weight_Scale
        z	Expected z5 to be in quantized value map for static quantizationr   r   N)	node_type
node_qtype)r?   q_namer  r   r%   r   r   r:   r  r8   quantize_bias_static_implr   r   Initializersize)rB   	bias_namer   weight_namebetaweight_scale_nameweight_initializerweight_scaler   r   inputscale_initializerinput_scalequantized_bias_namequantized_bias_scale_namequantized_bias_zp_namebias_scale_datar.  r/  quantized_values                      rO   quantize_bias_staticz"ONNXQuantizer.quantize_bias_static~  s    000++I6=== !44[ALL)*;TZZ=S=S=UV,-?@ 111#77
CNN4333+/+H+H+T(AAqy4ijkk!-.>

@V@V@X!Y+,BC **9k<QUV	
%"  8 8888(%"** %%)At!	
 />  +""r_   c                 ^    || j                   v xs || j                  v xs || j                  v S )zq
        only check for value info and newly generated tensor names, initializers are checked separately
        )r*   r6   rA   r   s     rO   contains_tensorzONNXQuantizer.contains_tensor  s=    
 D,,, ;t000;t999	
r_   c           	      2    | j                  ||dddd|      S )NFr  r7   indicesinitializer_use_weight_qTyperD   op_level_per_channelaxisfrom_subgraph_ONNXQuantizer__quantize_inputs)rB   r7   rF  rJ  s       rO   quantize_activationz!ONNXQuantizer.quantize_activation  s/    %%).!&' & 
 	
r_   c           	      2    | j                  ||d||||      S )NTrE  rK  )rB   r7   rF  rD   rH  rI  rJ  s          rO   quantize_weightzONNXQuantizer.quantize_weight  s1     %%)-%!5' & 
 	
r_   c           
         g }g }	g }
g }|D ]4  }|j                   |   }|| j                  v ra| j                  |   }|j                  |j                         |	j                  |j                         |
j                  |j
                         |s4|
j                  d       |j                  d       |	j                  d       t        || j                  j                               }|| j                  r=|r;| j                  |j                  |r| j                  n| j                  ||      \  }}}n/| j                  ||r| j                  n| j                  |      \  }}}|
j                  |       |	j                  |       |j                  |       | j                  |      r| j                  j!                  |dz   | j"                  | j                  j%                               }||j                   |   }|| j&                  v rr| j&                  |   }|j)                  d      sJ d| d       |j*                  j)                  d      sJ d| d       |j*                  j,                  j.                  }n(|| j0                  v sJ d|d	       | j0                  |   }| j3                  ||| j                  |
      }| y|r| j5                  |       n|j7                  |       |d   }|j8                  dk(  rY|
j7                  |j:                         |j                  |j                   d          |	j                  |j                   d          N|
j                  |j:                  d          |j                  |j:                  d          |	j                  |j:                  d          | j<                  f| j<                  j?                  ||g||||d      \  }}}}|
j                  |d          |j                  |d          |	j                  |d          tA        d| d| jB                          |
|	||fS )a  
        Given a node, this function quantizes the inputs as follows:
            - If input is an initializer, quantize the initializer data, replace old initializer
              with new initializer
            - Else, add QuantizeLinear nodes to perform quantization
            parameter node: node being quantized in NodeProto format.
            parameter indices: input indices to quantize.
            return: (List of quantized input names,
                     List of zero point names used for input quantization,
                     List of scale names used for input quantization,
                     List of new QuantizeLinear nodes created)
        r   r  rd   zvalue_info=z has no type.r   z is not a tensor.zshape inference failed for zF and attribute 'tensor_names' does not have any value for this tensor.r!  )NNNNr  rw   r      r   T)rG  rD   rH  rI  rJ  z!Invalid tensor name to quantize: z @graph scope)"r-   r?   r   r  r'  r0  r   r%   r   rC   quantize_weight_per_channelr)   rE   rF   quantize_initializerrC  find_node_by_namer4   r'   r*   r   rd   r   r   r6   r*  r   rl   ri   r,   rX   rL  r8   r5   )rB   r7   rF  rG  rD   rH  rI  rJ  scale_nameszero_point_namesquantized_input_namesr|   r"  
node_inputr@  r   q_weight_namer'  r  r(  r   r(   r   quantize_input_nodesparent_quantized_input_namesparent_zero_point_namesparent_scale_namesr   s                               rO   __quantize_inputszONNXQuantizer.__quantize_inputs  s[   .  ""KK0J T555"&":"::"F""?#=#=> ''(?(?@%,,_-C-CD%,,R0""2& ''+&z4::3I3I3KLK&##(<
 88#((-I))tOdOd$		%" :>9R9R#-I))tOdOd$:6M7J &,,]; ''0"":.%%j1#zz;;!22DNNDJJDTDTDV   '!%K!8J!T%5%55%)%5%5j%A
)226:ck*Ub<cc:)77Fs+V`UaarHssF'1'B'B'L'L  *T->->> 9* H+ ,>
 (,'8'8'D+/+I+Ik4+@+@| ,J ,( ,37$**+?@%9:#7#;L''+;;)001D1DE&&|'9'9!'<=$++L,>,>q,AB)001D1DQ1GH&&|':':1'=>$++L,?,?,BC( KK11 M1M!-)="& 2 0+& &,,-I!-LM""#5a#89 ''(?(BC !#DZLP]^b^n^n]o!pqqG #J %&6UJJr_   c                 f   |j                   | j                  v r<| j                  |j                      }|j                  |j                  |j                  fS | j                  ||||      \  }}}t        |j                   |||t        j                  d      }|| j                  |j                   <   |||fS )a  
        :param weight: TensorProto initializer
        :param qType: type to quantize to
        :param keep_float_weight: Whether to quantize the weight. In some cases, we only want to qunatize scale and zero point.
                                  If keep_float_weight is False, quantize the weight, or don't quantize the weight.
        :return: quantized weight name, zero point name, scale name
        N)	r)   r?   r0  r'  r  quantize_initializer_implr   r   r2  )	rB   r   r   rD   keep_float_weightr@  rY  r'  r  s	            rO   rS  z"ONNXQuantizer.quantize_initializer\  s     ;;$222"66v{{CO&&''**  .2-K-KE<):.
*w

 )KK**
 1@  -gz11r_   c                    || j                   v r2| j                   |   }|j                  |j                  |j                  fS | j	                  |||||      \  }}}	t        |||	|t        j                  d       }|| j                   |<   |||	fS r   )r?   r0  r'  r   quantize_weight_per_channel_implr   r   r2  )
rB   r5  rE   channel_axisrD   ra  r@  rY  r'  r  s
             rO   rR  z)ONNXQuantizer.quantize_weight_per_channel}  s     $222"66{CO&&''**  .2-R-R|\CT.
*w
 )**
 1@  -gz11r_   c                    || j                   v rd|| j                  vrU| j                   |   }t        |j                  | j                  j                               }| j                  j                  j                  dk7  s%| j                  j                  j                  dk(  r0|.t        j                  j                  |      j                  dk(  sJ |dz   }| j                  j                  || j                  | j                  j                               }|H|j                  |j                  |j                  g}t        j                   j#                  d||g|      }|S ||j$                  d   k(  sJ y)a  
        Given a value (input/output) which is quantized, add a DequantizeLinear node to dequantize
        it back to float32 or float16
            parameter value_name: value to dequantize
            parameter new_nodes_list: List of new nodes created before processing current node
            return: None if there is already a DequantizeLinear node that dequantizes it
                    A DequantizeLinear node otherwise
        rQ   Nr   _DequantizeLinearrx   r   )r?   rA   r   r  r%   r   rR   rT   numpy_helperto_arrayr3  rT  r4   r'   r0  r'  rU   rm   r,   )rB   
value_namer@  
scale_initdqlinear_namedqlinear_nodedqlinear_inputsdequantize_nodes           rO   _dequantize_valuezONNXQuantizer._dequantize_value  sV    $2224KeKe9e"66zBO &o&@&@$**BXBXBZ[J zz--1AA

  ..2BBzG] ((11*=BBaGGG&)<<M JJ88X\XbXbXhXhXjkM$#**#..#++#
 #'++"7"7&*}# '& "]%9%9!%<<<<r_   c                     | j                   j                         j                  D ];  }| j                  |j                        }|!| j
                  j                  |       = y)z
        Dequantize output if it is quantized
            parameter new_nodes_list: List of new nodes created before processing current node
            return: List of new nodes created
        N)r%   r'   r,   ro  r)   r4   r   )rB   r,   rn  s      rO   r   z!ONNXQuantizer._dequantize_outputs  sM     jj&&(//F"44V[[AO*%%o6 0r_   c           	      F   | j                   y | j                          i }| j                   D ]q  }| j                   |   }t        |t              st	        dt        |       d|d      | j                  j                  |i       }| j                  }d|v r|d   j                  }d|v rd|v r|d   |d   }}n|t        j                  j                  k(  rt        ||j                  d         \  }}n|j                  d	|j                   d
         }|j                  d|j                   d         }	|j                  d| j"                        }
|j                  dd      }t%        |||
      \  }}t'        ||	|||
| j(                        \  }}t+        |||      ||<   t |S )Nr   r   r   )default_valr  r   r   r   rminr   rmax	symmetricrD   F)rD   ru  )r   r   r  )rG   adjust_tensor_rangesr  r   r  rd   tensor_quant_overridesget_per_tensor_overridesrF   r   rT   r   FLOAT8E4M3FNr   avg_stdgetrange_valueis_activation_symmetricr   r   min_real_ranger   )rB   r:   r   tdquant_overridesr  zeror   rs  rt  ru  rD   qminqmaxs                 rO   r9   z+ONNXQuantizer.calculate_quantization_params  s   %!!# --K##K0Bb*-"248*E+PQ RSS"99RRS^lnRoO..J.,\:FF
/)lo.M-l;_W=Uet//<<<5j"**Q-Pe&**62>>!3DE&**62>>!3DE+//T=Y=YZ	.22>5I4Zlfop
d.tT4yRVReRefe/ATY^ku/v,/ .2 #"r_   r   )F)NN)NNN)g      ?)FFr  F)TFFr  F)FF)TF)__name__
__module____qualname__r$   r^   rt   r}   r   r   rY   r   r   r   r   r   r   r  r*  r,  rA  rC  rM  rO  rL  rS  rR  ro  r   r9    r_   rO   r   r   %   s     FMR/> fD
<+ Z
"22ARAh\7|9Px aeD&L/#b
	
" "
. &*"AKF2L 2@%N
7 #r_   r   )"r   numpyr  rT   onnx.numpy_helperr   r   base_quantizerr   r   	calibrater   
onnx_modelr   quant_utilsr	   r
   r   r   r   r   r   r   r   r   r   r   r   r   r   r   registryr   r   r  r_   rO   <module>r     sK        & = ! !    $ (K#M K#r_   