
    g}G              
         d dl mZ d dlZd dlZd dlmZ d dlmZ d dlZ	d dl
Z
ddlmZmZ ddlmZ ddlmZ dd	lmZ d
dlmZ ej,                  ej.                  hZej2                  ej4                  hZej8                  ej:                  hZdhZdZ 	 	 	 	 	 	 	 	 ddZ!ejD                  ej4                  ej4                  dddddddf
	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddZ# G d d      Z$y)    )annotationsN)Path)Any   )CalibrationDataReaderCalibrationMethod)	QuantType)StaticQuantConfig)TensorQuantOverridesHelper   )'MixedPrecisionTensorQuantOverridesFixerCastl        c                z    t        j                  d| d| j                   d| d| j                   d| d|        y )NzUnable to override z for z node's z because it has already been overridden! Check the initial quantization overrides provided to get_qnn_qdq_config() if the generated QDQ model does not run on QNN EP. Node name: z, z name: )loggingwarningop_typename)nodewhat_strtensor_nameio_kinds       r/var/www/openai/venv/lib/python3.12/site-packages/onnxruntime/quantization/execution_providers/qnn/quant_config.pywarn_unable_to_overrider      sJ     OO
hZuT\\N(7) L ii[7)7;-	A    FTc                z   |	"|t         j                  t         j                  hv }	t        | t        j
                        r| nt	        j                  | d      }t               }d}i }|j                  j                  D ]3  }|||j                  <   t        j                  j                  |      s2d}5 t        |rt        j                  |      ni       }|j!                         s+|r)t#        j$                  |||      }|j'                  ||       t)        ||||	|||      }|j                  j*                  D ].  }|j-                  |j.                         |j1                  |       0 dd|
|j3                         ||	|d}t5        d |j6                  D              }|j8                  dk  rJt:        j=                  t>              tA        fd|jC                         D              }|v s|v s|rd|d	<   tE        ||||tG        |jI                  tJ                    ||xs |jM                         tN        k\  |
      S )a  
    Returns a static quantization configuration suitable for running QDQ models on QNN EP.
    This is done primarily by setting tensor-level quantization overrides.

    Params:
        model_input: Path to the input model file or ModelProto.
        calibration_data_reader: Calibration data reader.
        calibrate_methode: The calibration method. Defaults to MinMax.
        activation_type: The default activation quantization type. Defaults to QUInt8.
        weight_type: The default weight quantization type. Defaults to QUInt8.
        per_channel: Global option that determines if a fixed set of operator types should be quantized per-channel.
            Defaults to false. Alternatively, use the tensor-level `init_overrides` to select individual operators
            and their quantization axes.

            If set, the quantization tool uses per-channel quantization for the following operator types and inputs:
                - Conv:
                    - input[1] on axis 0
                    - input[2] (bias) on axis 0
                - ConvTranspose:
                    - input[1] on axis 1
                    - input[2] (bias) on axis 0
        init_overrides: Initial tensor-level quantization overrides. Defaults to None. This function updates of a copy
            of these overrides with any necessary adjustments and includes them in the returned
            configuration object (i.e., config.extra_options['TensorQuantOverrides']).

            The key is a tensor name and the value is a list of dictionaries. For per-tensor quantization, the list
            contains a single dictionary. For per-channel quantization, the list contains either a dictionary for
            each channel in the tensor or a single dictionary that is assumed to apply to all channels. An 'axis'
            key must be present in the first dictionary for per-channel quantization.

            Each dictionary contains optional overrides with the following keys and values.
                'quant_type' = QuantType : The tensor's quantization data type.
                'axis' = Int             : The per-channel axis. Must be present for per-channel weights.
                'scale' =  Float         : The scale value to use. Must also specify `zero_point` if set.
                'zero_point' = Int       : The zero-point value to use. Must also specify `scale` is set.
                'symmetric' = Bool       : If the tensor should use symmetric quantization. Invalid if also
                                            set `scale` or `zero_point`.
                'reduce_range' = Bool    : If the quantization range should be reduced. Invalid if also
                                            set `scale` or `zero_point`. Only valid for initializers.
                'rmax' = Float           : Override the maximum real tensor value in calibration data.
                                            Invalid if also set `scale` or `zero_point`.
                'rmin' = Float           : Override the minimum real tensor value in calibration data.
                                            Invalid if also set `scale` or `zero_point`.
                'convert' = Dict         : A nested dictionary with the same keys for an activation
                                           tensor that should be converted to another quantization type.
                'convert["recv_nodes"] = Set : Set of node names that consume the converted activation,
                                               other nodes get the original type. If not specified,
                                               assume all consumer nodes get the converted type.
        add_qtype_converts: True if this function should automatically add "convert" entries to the provided
            `init_overrides` to ensure that operators use valid input/output types (activations only).
            Ex: if you override the output of an Add to 16-bit, this option ensures that the activation inputs
            of the Add are also up-converted to 16-bit and that data types for surrounding ops are converted
            appropriately. Refer to the documentation in mixed_precision_overrides_utils.py for additional details.
        activation_symmetric: True if activations should be quantized symmetrically (i.e, rmax == -rmin) by default.
            Defaults to false. For int8 and int16, this results in zero-point values of 0. For uint8 and uin16,
            the zero-point values are 128 and 32,768, respectively.
        weight_symmetric: True if weights should be quantized symmetrically (i.e., rmax == -rmin) by default.
            Defaults to None. If set to None, weight_symmetric is assumed true if the weight_type is a signed int.
        keep_removable_activations: Defaults to false. If true, "removable" activations (e.g., Clip or Relu) will not
                        be removed, and will be explicitly represented in the QDQ model. If false, these activations
                        are automatically removed if activations are asymmetrically quantized. Keeping these activations
                        is necessary if optimizations or EP transformations will later remove
                        QuantizeLinear/DequantizeLinear operators from the model.

    Returns:
        A StaticQuantConfig object
    F)load_external_dataTg-C6?)MinimumRealRangeDedicatedQDQPairQDQKeepRemovableActivationsTensorQuantOverridesActivationSymmetricWeightSymmetricCalibStridedMinMaxc              3  ^   K   | ]%  }|j                   d k(  s|j                   dk(  s"| ' yw) zai.onnxN)domain).0xs     r   	<genexpr>z%get_qnn_qdq_config.<locals>.<genexpr>   s)     _!3Aqxx2~U^I^a!3s   #--   c              3  &   K   | ]  }|v  
 y wN )r'   topset21_typess     r   r)   z%get_qnn_qdq_config.<locals>.<genexpr>   s     *jGi!1+=Gis   UseQDQContribOps)calibrate_methodactivation_typeweight_typeop_types_to_quantizeper_channeluse_external_data_formatextra_options)(r	   QInt8QInt16
isinstanceonnx
ModelProto
load_modelsetgraphinitializerr   external_data_helperuses_external_datar   copydeepcopyemptyr   create_from_modelapplyQnnCompatibilityOverridesr   addr   process_nodeget_dictnextopset_importversion	Q16_TYPESunionQ4_TYPESanyget_quant_typesr
   list
differenceOP_TYPES_TO_EXCLUDEByteSizeMODEL_SIZE_THRESHOLD)model_inputcalibration_data_readerr1   r2   r3   r5   init_overridesadd_qtype_convertsactivation_symmetricweight_symmetrickeep_removable_activationsstridemodelop_typesmodel_has_external_dataname_to_initializerr@   overrides_helperoverrides_fixer
qnn_compatr   r7   
onnx_opsetoverrides_have_opset21_typesr/   s                           @r   get_qnn_qdq_configrj   +   s)   b &9??I<L<L*MM k4??3 	__[UC 
 uH# {{..0;K,,-$$77D&*# /
 2Sa$--2Ogij!!#(:ASSe_
 	o/CD +J   T\\"% !
 #!'A 0 9 9 ;3+$M _!3!3__JB!1'**jGWGgGgGi*j'j$m+{m/KOk04M,-)'!("5"56I"JK"9"eU^^=MQe=e#	 	r   c                  d    e Zd ZdZ	 	 	 	 	 	 	 	 	 	 	 	 	 	 d
dZddZddZddZddZddZ	ddZ
y	)rH   z
    Helper that processes nodes to generate quantization overrides that make the resulting QDQ model
    compatible with QNN EP.
    c                    || _         || _        || _        || _        || _        || _        || _        | j                  | j                  | j                  | j                  d| _        y )N)MatMulLayerNormalizationSigmoidTanh)default_activation_qtypedefault_weight_qtyper]   r^   r5   	overridesinitializers_process_matmul_process_layernorm_process_sigmoid_process_tanhprocess_fns)selfrq   rr   r]   r^   r5   rs   rt   s           r   __init__z"QnnCompatibilityOverrides.__init__   sl     )A%$8!$8! 0&"( **"&"9"9,,&&	
r   c                d    | j                   j                  |j                        }|	 ||       y y r,   )ry   getr   )rz   r   
process_fns      r   rJ   z&QnnCompatibilityOverrides.process_node   s/    %%))$,,7
!t "r   c                R   | j                   t        vryd}d}t        d      D ]  }|j                  |   }|s|| j                  v }| j
                  j                  ||j                  |rdn| j                        }|j                   y|r4|j                  | j                   k(  r|j                  | j                  k(  r y|r|}|j                  t        v s|} |rJ|rG| j
                  j                  || j                   | j                  dd      }|st        |d|d       yyyy)	a  
        Overrides initializer input(s) to use the default weight type if:
        - The default weight type is 8-bit
        - One of the inputs is a 16-bit activation
        - The other input is an initializer (per-tensor quantized)

        This is necessary because the quantization tool does not assign MatMul or LayerNorm initializer
        inputs the default weight type. Instead, it assigns the default activation type.
        N   )default_qtype
quant_type	symmetricF)	overwritezquant_type/symmetriczinput weight)rr   Q8_TYPESrangeinputrt   rs   get_node_input_qtype_infor   rq   axisr   r   r^   rO   update_tensor_overridesr   )	rz   r   input_16bit_act_nameinput_weight_namei
input_name	is_weight
qtype_info
did_updates	            r   +_make_static_inputs_use_default_weight_typezEQnnCompatibilityOverrides._make_static_inputs_use_default_weight_type   s@    $$H4#  qAAJ"d&7&77IAA		&/dT5R5R B J * ))T-F-FF((D,A,AA$.!&&)3'1$3 8  $5??!#88tG\G\] @ J '.DFWYgh  %6r   c                P   |j                   dk(  sJ d|j                           | j                  s| j                  |       y |j                  D ]V  }|| j                  v xr || j
                  v}|s$| j
                  j                  || j                  | j                  d       X y )Nrm   zExpected MatMul, but got r   )	r   r5   r   r   rt   rs   r   rr   r^   )rz   r   r   is_weight_no_overridess       r   ru   z)QnnCompatibilityOverrides._process_matmul#  s    ||x'S+DT\\N)SS'<<TB **J%/43D3D%D%i[_[i[iIi"%66#'#<#<4K`K`a %r   c                   |j                   dk(  sJ d|j                           | j                  s| j                  |       y |j                  d   | j                  v xr |j                  d   | j
                  v}t        |j                        dkD  xrI |j                  d   xr8 |j                  d   | j                  v xr |j                  d   | j
                  v}|s|rt        d      y )Nrn   z%Expected LayerNormalization, but got r   r   zget_qnn_qdq_config() does not currently support the global per_channel option with LayerNormalization. Please try using custom overrides that make bias per-tensor quantized.)r   r5   r   r   rt   rs   len
ValueError)rz   r   has_weight_no_overrideshas_bias_no_overridess       r   rv   z,QnnCompatibilityOverrides._process_layernorm6  s    ||33k7\]a]i]i\j5kk3<<TB"&**Q-43D3D"D"lTU^b^l^lIl

Oa 4

14

1!2!224 

1T^^3	 	 #&; Z  '<r   c           	        |j                   dk(  sJ d|j                           | j                  j                  |j                  d   | j                        j
                  }|t        j                  k(  rt| j                  j                  |j                  d   |t        j                  dt        j                        t        j                  dt        j                        d       y|t        j                  k(  rt| j                  j                  |j                  d   |t        j                  dt        j                        t        j                  dt        j                        d       yy)	za
        Overrides 16-bit Sigmoid's output scale and zero-point as per QNN requirements.
        ro   zExpected Sigmoid, but got r   g      >dtyper   scale
zero_point       ?Nr   rs   get_node_output_qtype_infooutputrq   r   r	   QUInt16r   nparrayfloat32uint16r9   int16rz   r   output_types      r   rw   z*QnnCompatibilityOverrides._process_sigmoidM  s    ||y(U,Ft||n*UU(nn??KKND99

* 	 )+++NN22A"-XXm2::F"$((1BII"> I,,,NN22A"-XXm2::F"$((1BHH"= -r   c           	        |j                   dk(  sJ d|j                           | j                  j                  |j                  d   | j                        j
                  }|t        j                  k(  rt| j                  j                  |j                  d   |t        j                  dt        j                        t        j                  dt        j                        d       y|t        j                  k(  rt| j                  j                  |j                  d   |t        j                  dt        j                        t        j                  dt        j                        d       yy)	z^
        Overrides 16-bit Tanh's output scale and zero-point as per QNN requirements.
        rp   zExpected Tanh, but got r   r   r   i   r   Nr   r   s      r   rx   z'QnnCompatibilityOverrides._process_tanhi  s    ||v%O)@'OO%nn??KKND99

* 	 )+++NN22A"-XXm2::F"$((5		"B I,,,NN22A"-XXm2::F"$((1BHH"= -r   N)rq   r	   rr   r	   r]   boolr^   r   r5   r   rs   r   rt   zdict[str, onnx.TensorProto])r   onnx.NodeProto)__name__
__module____qualname____doc__r{   rJ   r   ru   rv   rw   rx   r-   r   r   rH   rH      sp    

"+
 (
 #	

 
 
 .
 2
25in&.8r   rH   )r   r   r   strr   r   r   r   )rY   zstr | Path | onnx.ModelProtorZ   r   r1   r   r2   r	   r3   r	   r5   r   r[   z&dict[str, list[dict[str, Any]]] | Noner\   r   r]   r   r^   zbool | Noner_   r   r`   z
int | Nonereturnr
   )%
__future__r   rC   r   pathlibr   typingr   numpyr   r;   	calibrater   r   quant_utilsr	   quantizer
   tensor_quant_overridesr   mixed_precision_overrides_utilsr   r9   r   rO   r8   QUInt8r   QInt4QUInt4rQ   rV   rX   r   MinMaxrj   rH   r-   r   r   <module>r      sp   #       A $ ) @ Ty001	OOY--.OOY--.h ! 
  	" +<*B*B!*!1!1&--=A#!&$(',Y-Y2Y (Y 	Y
 Y Y ;Y Y Y "Y !%Y Y Yx| |r   