
    
g                       d dl mZ d dlZd dlZd dlZd dlmZ d dlmZ d dl	Z	d dl
Z
d dl
mZmZmZ d dl
mZ d dlmZmZmZmZ d dlmZ d d	lmZmZmZ 	 d d
lmZ 	 d dlmZmZ 	 d dl m!Z! dZ"dZ#dZ$dZ%dZ&dZ'dZ(dZ)dZ*dZ+i Z, e-e      D  ci c]  }  e. e/e|       e0      s e/e|       |  c} Z1 G d de      Z2 G d de      Z3 G d de      Z4 G d de      Z5ej                  jl                   e	jn                  d      ej                  jp                   e	jn                  d       ej                  jr                   e	jn                  d!      ej                  jt                   e	jn                  d"      ej                  jv                  eej                  jx                  eej                  jz                  eiZ>ej                  jp                   e	j~                  d e	j                  #       e	j~                  d$e	j                  #      fej                  jl                   e	j~                  d%e	j                  #       e	j~                  d&e	j                  #      fej                  jt                   e	j~                  d e	j                  #       e	j~                  d'e	j                  #      fej                  jr                   e	j~                  d(e	j                  #       e	j~                  d)e	j                  #      fej                  jz                   e	j~                  d e#       e	j~                  d*e#      fej                  jx                   e	j~                  d+e#       e	j~                  d,e#      fiZDej                  jp                   e	j~                  d e	j                  #       e	j~                  d-e	j                  #      fej                  jl                   e	j~                  d.e	j                  #       e	j~                  d&e	j                  #      fej                  jt                   e	j~                  d e	j                  #       e	j~                  d/e	j                  #      fej                  jr                   e	j~                  d0e	j                  #       e	j~                  d)e	j                  #      fiZEej                  jp                   e	j~                  d e	j                  #       e	j~                  d&e	j                  #      fej                  jl                   e	j~                  d1e	j                  #       e	j~                  d2e	j                  #      fej                  jt                   e	j~                  d e	j                  #       e	j~                  d)e	j                  #      fej                  jr                   e	j~                  d3e	j                  #       e	j~                  d4e	j                  #      fej                  jz                   e	j~                  d e#       e	j~                  d,e#      fej                  jx                   e	j~                  d5e#       e	j~                  d6e#      fiZFd7d8d9ZGdad:ZHdbd;ZId< ZJ	 	 	 	 dc	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddd=ZK	 dc	 ded>ZL	 	 da	 	 	 	 	 	 	 	 	 	 	 	 	 dfd?ZMdgd@ZNdgdAZOdhdBZPdidCZQ G dD dE      ZR G dF dG      ZS G dH dI      ZTdJ ZUdK ZVdL ZWdM ZXdjdNZYdO ZZdkdPZ[dldQZ\dmdRZ]dndSZ^dodTZ_dpdUZ`dodVZadpdWZbdqdXZcdrdYZddsdZZedtd[Zfdtd\Zgdud]Zhdud^Zidud_Zjdud`Zky# e$ r dZY w xY w# e$ r dZdZY w xY w# e$ r dZ!Y w xY wc c} w )v    )annotationsN)Enum)Path)
ModelProtoTensorProtoexternal_data_helper)onnx_pb)
make_graph
make_model	make_nodemake_tensor_value_info)ReferenceEvaluator)GraphOptimizationLevelInferenceSessionSessionOptionsfloat8e4m3fn)int4uint4)to_array_extendedzonnx.quantizez0.1.0zai.onnxzcom.microsoftQuantizeLinear_QuantizeLinear_InputDequantizeLinear_DequantizeLinear_Output
_quantizedl        c                  *    e Zd ZdZdZd Zed        Zy)QuantizationModer      c                    | j                   S Nnameselfs    Y/var/www/openai/venv/lib/python3.12/site-packages/onnxruntime/quantization/quant_utils.py__str__zQuantizationMode.__str__C       yy    c                D    	 t         |    S # t        $ r t               w xY wr    )r   KeyError
ValueError)modes    r%   from_stringzQuantizationMode.from_stringF   s)    	#D)) 	,	    N)__name__
__module____qualname__
IntegerOps
QLinearOpsr&   staticmethodr-    r(   r%   r   r   ?   s%    JJ  r(   r   c                  *    e Zd ZdZdZd Zed        Zy)QuantizedValueTyper   r   c                    | j                   S r    r!   r#   s    r%   r&   zQuantizedValueType.__str__R   r'   r(   c                D    	 t         |    S # t        $ r t               w xY wr    )r7   r*   r+   )vs    r%   r-   zQuantizedValueType.from_stringU   s)    	%a(( 	,	r.   N)r/   r0   r1   InputInitializerr&   r4   r-   r5   r(   r%   r7   r7   N   s%    EK  r(   r7   c                  N    e Zd ZdZdZdZdZdZdZdZ	d Z
ed	        Zed
        Zy)	QuantTyper   r                  c                    | j                   S r    r!   r#   s    r%   r&   zQuantType.__str__f   r'   r(   c                D    	 t         |    S # t        $ r t               w xY wr    )r>   r*   r+   )ts    r%   r-   zQuantType.from_stringi   s(    	Q< 	,	r.   c                
   | t         j                  k(  rt        j                  S | t         j                  k(  rt        j
                  S | t         j                  k(  rt        j                  S | t         j                  k(  rt        j                  S | t         j                  k(  rt        j                  S | t         j                  k(  rt        j                  S | t         j                  k(  rt        j                  S t!        d| d      )NzUnexpected value qtype=.)r>   QInt8r   INT8QUInt8UINT8QUInt16UINT16QInt16INT16QFLOAT8E4M3FNFLOAT8E4M3FNQUInt4UINT4QInt4INT4r+   r#   s    r%   tensor_typezQuantType.tensor_typep   s    9??"###9###$$$9$$$%%%9###$$$9***+++9###$$$9??"###24(!<==r(   N)r/   r0   r1   rI   rK   rQ   rO   rM   rU   rS   r&   r4   r-   propertyrW   r5   r(   r%   r>   r>   ]   sR    EFMFGEF   > >r(   r>   c                  *    e Zd ZdZdZd Zed        Zy)QuantFormatr   r   c                    | j                   S r    r!   r#   s    r%   r&   zQuantFormat.__str__   r'   r(   c                D    	 t         |    S # t        $ r t               w xY wr    )rZ   r*   r+   )formats    r%   r-   zQuantFormat.from_string   s)    	v&& 	,	r.   N)r/   r0   r1   	QOperatorQDQr&   r4   r-   r5   r(   r%   rZ   rZ      s%    I
C  r(   rZ   int8uint8int16uint16dtype   i   i  i i     i      ii  ii@   i i @  r@   zero_point_indexc                @   g }t        |      D ]  \  }}t        j                  t        |      t        j                        r%|j                  t        j                  |             n=t        |t        j                        r|j                  |       nt        d| d|       || k(  s|d   }|j                  t        j                  k(  s|j                  t        j                  k(  st        d|j                          t        |      dkD  rt        |      S |d   S )Nzarg z is not an array: rm   zzero_point cannot be r   r   )	enumeratenumpy
issubdtypetypenumberappendarray
isinstancendarray	TypeErrorre   float32float16lentuple)ro   argsnew_argsiar:   s         r%   _check_typer      s    H$1DGU\\2OOEKKN+5==)OOAd1#%7s;<<  Aww%--'177emm+C"7y ABB   "(ma/5?@Xa[@r(   c                   | t         v sJ d|  d       | t        j                  j                  t        j                  j                  t        j                  j
                  t        j                  j                  fv r/|dk7  rt        d|d      |j                  t        j                  k(  rt        j                  }nG|j                  t        j                  k(  rt        j                  }nt        d|j                   d      t        t!        t#        dg dgt$        j&                  j)                  d| g dg      	      t#        d
g ddg      gdt+        d|d       t+        d|d       gt+        d| d       g            }t-        |      }t/        |j1                  d ||d      d         S t         |    }	t3        | dd      \  }
}|t5        |
|      n|
}|t7        ||      n|}t        j8                  |j;                  t        j                        |z  j=                         |z         }t        j>                  ||||       t/        |j;                  |	            S )NUnexpected data type > requested. Only INT8, UINT8, INT16, and UINT16 are supported.r   z2zero_point is expected to be null for float 8 not rH   zUnexpected dtype Constant
zero_point)valuer   )Xscaler   Yqur   r   )r   r   F)reduce_range	symmetric)out) ONNX_TYPE_TO_NP_TYPE
onnx_protor   rR   FLOAT8E4M3FNUZ
FLOAT8E5M2FLOAT8E5M2FNUZNotImplementedErrorre   rr   r{   FLOATr|   FLOAT16r+   r   r
   r   onnxhelpermake_tensorr   r   r   runget_qmin_qmax_for_qTypemaxminasarrayastyperoundclip)qTypearrr   r   lowhigh	onnx_type
onnx_modelrefre   qminqmaxcliplowcliphigharr_fp32s                  r%   quantize_nparrayr      s-   %%e	ug%cde%++--))--	  ?%(Z[eZhhi&jkk99%#))IYY%--'#++I01=>>"Bdkk>U>UVbdikmpqor>s .0LseT	 *3	4@*7ItD (UD9:

  !,3774sU)CDQGHH %U+,URWX
d$'O#dC.&*&63tT?D==#**U]]";e"C!J!J!Lz!YZ

8WhH=8??5122r(   c           	        |dkD  s|dk  rt        d| d|       t        j                  | t        j                  d| j                              } t        j
                  |t        j                  d|j                              }|.t        || t        j                  || j                        z         }|rBt        j
                  t        j                  |       t        j                  |            }| } |}||k  sJ d|  d|        t        j                  || z
  t        j                        }t        j                  |t        j                        t        j                  |t        j                        z
  }t        j                  ||z        }	|	dk\  sJ d       |	t        j                  |j                        j                  k  rFt        j                  d|j                        }	t        j                  d|j                        }
|
|	gS |r^t        j                  t        j                  ||z   t        j                  d	t        j                        z        |j                        }
n:t        j                  t        j                  || |	z  z
        |j                        }
|	j                  |j                        }	|
|	gS )
a  Calculate the scale s and zero point z for the quantization relation
    r = s(q-z), where r are the original values and q are the corresponding
    quantized values.

    r and z are calculated such that every value within [rmin,rmax] has an
    approximate representation within [qmin,qmax]. In addition, qmin <= z <=
    qmax is enforced. If the symmetric flag is set to True, the interval
    [rmin,rmax] is symmetrized to [-absmax, +absmax], where
    absmax = max(abs(rmin), abs(rmax)).

    :parameter rmin: minimum value of r
    :parameter rmax: maximum value of r
    :parameter qmin: minimum value representable by the target quantization data type
    :parameter qmax: maximum value representable by the target quantization data type
    :parameter symmetric: True if the floating-point range should be made symmetric. Defaults to False.
    :parameter min_real_range: Minimum floating-point range (i.e., rmax - rmin) to enforce. Defaults to None.
    :return: zero and scale [z, s]

    r   Bqmin and qmax must meet requirement: qmin <= 0 <= qmax while qmin:, qmmax:rd   zqmin=z > qmax=z
scale isse      ?g       @)r+   rr   minimumrw   re   maximumr   r   absfloat64finfotinyr   r   )rminrmaxr   r   r   min_real_rangeabsmaxdrdqr   r   s              r%   compute_scale_zpr      s    ( ax4!8]^b]ccklpkqrss
 ==u{{1DJJ?@D==u{{1DJJ?@D !4nDJJ OOPuyy		$@ww4<55htf55<	TD[	6B	T	/%++d%--2X	XBKKR EA:#|#:u{{4::&+++Ctzz2[[$**5
   TD[EKK5==,QQRZ^ZdZdJ U[[u1D%ETZZXJTZZ(r(   c                   d}| t         vr| t        j                  k(  rddlm} ddlm} |}t        d      D cg c]
  } ||       }}t        j                  |D cg c]0  }t        j                  |      rt        j                  |      r/|2 c}t        j                        }nt        d|  d      |t         | <   n| t        j                  k(  rddlm} |}|t        d	|  d
      t        j                  t         |          }	t        j                  d|      }
t        j                  ||	z  |j                         }|
|gS c c}w c c}w )ar  Calculate the scale s for a float8 type (E4M3FN).
    The function assumes the coefficient distribution and the float 8
    distribution are similar to two gaussian laws.

    :return: zero and scale [z, s]

    More details in notebook `quantization_fp8.ipynb
    <https://github.com/microsoft/onnxruntime/blob/main/docs/python/notebooks/quantization_fp8.ipynb>`_.
    Nr   )float8e4m3_to_float32r      rd   zQuantization to element_type=z not implemented.zUnexpected element_type rH   )FLOAT8_DISTRIBUTIONSr   rR   onnx.numpy_helperr   #onnx.reference.custom_element_typesr   rangerr   rw   isnanisinfr{   r+   rz   stdre   )element_typer   zp_dtyper   r   r   
all_valuesfvaluesstd_f8zeror   s               r%   compute_scale_zp_float8r   9  s*    H//;333?H#H<A#JGJq/2JJG[[&TJqekk!nU[[QR^JT\a\i\iF <\NJ[\]]-3\*	11	1D2<.BCCYY+L9:F;;q)DKKfCII6E%=# HTs   E
E8EEc                   t        | t        j                        st        dt	        |        d      ||}nt        |       r| j                         nd}||}nt        |       r| j                         nd}t        j                  || j                        }t        j                  || j                        }t        j                  d| j                        }	|t        j                  k(  r?|rt        d      t        j                  |       }
t        ||
      \  }}	t        ||	d      S |t        j                   t        j"                  t        j$                  t        j&                  t        j(                  t        j*                  fv r_t-        |||	      \  }}t        |       rt/        ||||||      \  }}	n!t        j                  d|j                        }t        ||	d      S t1        d
| d      )a  
    Returns the zero_point and scale for the given data.

    :param data: The data for which to compute quantization parameters.
    :param quant_type: The quantization data type.
    :param symmetric: whether symmetric quantization is used or not.
    :parameter reduce_range: True if the quantization range should be reduced. Defaults to False.
    :parameter min_real_range: Minimum floating-point range (i.e., rmax - rmin) to enforce. Defaults to None.
    :parameter rmin_override: The value of rmin to use if not None. Otherwise, uses min(data).
    :parameter rmax_override: The value of rmax to use if not None. Otherwise, uses max(data).
    :return: zero point and scale
    z%Weight must be given as an array not rH   g        rd   r   z1Unsupported option reduce_range=True for float 8.r   rn   r   z Unexpected value for quant_type=)rx   rr   ry   rz   rt   r}   r   r   rw   re   r   rR   RuntimeErrorr   r   r   rJ   rL   rP   rN   rV   rT   r   r   r+   )data
quant_typer   r   r   rmin_overridermax_overrider   r   r   r   r   r   r   s                 r%   compute_data_quant_paramsr   ^  s   * dEMM*?T
|1MNN  YtxxzC  YtxxzC;;t4::.D;;t4::.DKK4::.E[---RSSiio3JD
E:uqAA  -ZQZ[
dt9 0tT4Tb cJQdjj9J:uqAA
7
|1E
FFr(   c                   t        | ||||||      \  }}|t        j                  k(  rt        || ||      }	t	        |	j                  t        j                        j                         dz  dk(        ret        j                  |       }
t        d|
j                          d|
j                          d|	j                          d|	j                          d	      |||	fS |t        j                  t        j                  t        j                  t        j                   t        j"                  t        j$                  fv rt        || ||      }	|||	fS t'        d| d      )al  
    :param data: data to quantize
    :param qType: data type to quantize to.
    :param symmetric: whether symmetric quantization is used or not.
    :parameter reduce_range: True if the quantization range should be reduced. Defaults to False.
    :parameter min_real_range: Minimum floating-point range (i.e., rmax - rmin) to enforce. Defaults to None.
    :parameter rmin_override: The value of rmin to use if not None. Otherwise, uses min(data).
    :parameter rmax_override: The value of rmax to use if not None. Otherwise, uses max(data).
    :return: minimum, maximum, zero point, scale, and quantized weights

    To pack weights, we compute a linear transformation

    - when data `type == uint8` mode, from `[rmin, rmax]` -> :math:`[0, 2^{b-1}]` and
    - when data `type == int8`, from `[-m , m]` -> :math:`[-(2^{b-1}-1), 2^{b-1}-1]` where
        `m = max(abs(rmin), abs(rmax))`

    and add necessary intermediate nodes to transform quantized weight to full weight using the equation

    :math:`r = S(q-z)`, where

    - *r*: real original value
    - *q*: quantized value
    - *S*: scale
    - *z*: zero point
    rg   z+One of the quantized value is NaN data in [z, z], quantized_data in [z].zUnexpected value for qType=rH   )r   r   rR   r   anyr   rr   ra   ravelr   r   r   r   rJ   rL   rP   rN   rV   rT   r+   )r   r   r   r   r   r   r   r   r   quantized_datanp_datas              r%   quantize_datar     sZ   8 2J ((()%ujI%%ekk288:S@SHImmD)G=gkkm_Bw{{}o ^&&4&8&8&:%;2n>P>P>R=SSUW  5.00  *%ujI5.00
25';
<<r(   c                   t        |       }d}|t        ||j                         ||      }n|j                  |   }t	        |j                        }	d|	|<   g }
t        |      D ]m  }|j                  ||      }||   }||   }t        ||j                         ||      }|
j                  t        j                  |      j                  |	             o t        j                  |
|      }|r|n| j                   t         }|t        j                  j                   k(  r"t        j                         }||_        |j$                  j'                  | j$                         ||_        |j)                         j+                         j-                         |_        t0        t1        |      }|j                  |j                  k7  s!|j-                         |j-                         k7  r]t3        d|j                   d|j-                         dd  d|j-                         dd  d| j                   dt5        |      dd	  d
      |S |t        j                  j6                  t        j                  j8                  fv r|j:                  t        j<                  t        j>                  fvrt3        d| d      tA        tC        |j-                                     }t        jD                  jG                  ||| j$                  |d      }|S t        jD                  jI                  |      }t        j                  ||      j                  | j$                        }t        jJ                  jM                  ||      }|S )aG  
    Returns a quantized version of the given ONNX initializer.

    :param weight: The ONNX initializer to quantize.
    :param quant_type: The final quantized data type.
    :param zero_point: The zero-point value to use for quantization.
    :param scale: The scale value to use for quantization.
    :param axis: The quantization axis if quantizing per-channel. Defaults to None.
    :param quant_weight_name: The name of the quantized initializer.
                              If not specified, the quantized name is generated.
    :return: The quantized ONNX initializer.
    Nr   zThe initializer of shape z! could not be created, expecting 
   z, got z and shape=z
raw=   rH   zQuantized weights for z. must be 8-bit before packing as 4-bit values.T)rawrd   )'tensor_proto_to_arrayr   r   shapelistr   takerv   rr   r   reshapeconcatenater"   TENSOR_NAME_QUANT_SUFFIXr   r   rR   	data_typedimsextendflattencopytobytesraw_datar   r   strrV   rT   re   r`   ra   bytespack_bytes_to_4bitr   r   tensor_dtype_to_np_dtypenumpy_helper
from_array)weightr   r   r   axisquant_weight_nameweight_dataq_weight_datachannel_countchannel_dimsquantized_channel_data_listr   channel_datachannel_scalechannel_zero_pointquantized_channel_dataq_weight_nameq_weight_initializercheckpacked_dataquant_np_dtypes                        r%   quantize_onnx_initializerr    s-   ( (/K*.M|([5F5F5H%Q[\#))$/K--.T&(#}%A&++At4L!!HM!+A%5L..0-AS&" (..u}}=S/T/\/\]i/jk & ))*EtL):%6;;-PhOi@jMT%%222#//1)3&!!((5$1!(5(=(=(?(D(D(F(N(N(P%( &&:;E{{k///5==?mF[F[F]3]"/0A0A/BBc$,,.s34F5==?3B;O:PP[\b\h\h[iS!56t<=Q@ (   
((--t/?/?/E/EF	Fuzz5;;&??!7Ftuvv .}/D/D/FGH  ${{66}jRXR]R]_jpt6u  	 ==jIm>JRRSYS^S^_#00;;M=Yr(   c                j   | t         j                  j                  k(  rt        d      d}|rt        j                  |       }n)|r| t        v r
t        |    }nt        j                  |       }|st        d|  d      |\  }}|dkD  s|dk  r't        d| d| d|j                   d	| d
| d|        |S )z
    Return qmin and qmax, the minimum and maximum value representable by the given qType
    :parameter qType: onnx.onnx_pb.TensorProto.UINT8 or onnx.onnx_pb.TensorProto.UINT8
    :return: qmin, qmax
    z;This function is not implemented for float 8 as not needed.Nr   r   r   r   r   z, dtype=z, reduce_range=z, symmetric=z, qType=)
r   r   rR   r   ONNX_INT_TYPE_REDUCED_RANGEgetONNX_INT_TYPE_SYMMETRIC_RANGEONNX_INT_TYPE_RANGEr+   re   )r   r   r   qranger   r   s         r%   r   r   (  s     
&&333!"_``F,007	u ==.u5$((/07uvwwJD$ax4!86$x

|?<. Y"8E74
 	
 Mr(   c                .    t        | ||      \  }}||z
  S )z
    Helper function to get the quantization range for a type.
        parameter qType: quantization type.
        return: quantization range.
    r   )r   )r   r   r   r   r   s        r%   get_qrange_for_qTyper  H  s      )	RJD$$;r(   c                :    | dk  r| |z   n| }|dk\  xr ||k  }||fS )z
    Helper function that tries to return a normalized axis in the range [0, rank - 1].
    :parameter axis: The axis to normalize.
    :parameter rank: The tensor rank (number of dimensions).
    :return (is_valid, axis_norm)
    r   r5   )r   rank	axis_normis_valids       r%   normalize_axisr  R  s3      $axtTIA~2)d"2HYr(   c                    t        |       }|dk(  r
t               S |dz   dz  }t        |      }d}d}||dz
  k  r-| |dz      dz  dz  | |   dz  z  ||<   |dz  }|dz  }||dz
  k  r-||k  r| |   dz  ||<   |S )aB  
    Copies a source array of 8-bit values into a destination bytearray of packed 4-bit values.
    Assumes that the source values are already in the appropriate int4 range.
    :parameter src_8bit: The 8-bit element values to pack.
    :return A bytearray with every two 8-bit src elements packed into a single byte.
    r   r   r?   rh   rA   )r}   	bytearray)src_8bit	num_elemsdst_sizedstsrc_idst_is         r%   r   r   ^  s     HIA~{A!#H
H
CEE )a-
	*S0Q68E?S;PQE


 )a-

 ye_s*E
Jr(   c                      e Zd ZdZg g dfdZy)QuantizedInitializerzJ
    Represents a linearly quantized weight input from ONNX operators
    Nc
                    || _         || _        || _        || _        || _        || _        || _        || _        |	| _        y r    )	r"   initializerrminsrmaxszero_pointsscalesr   r   r   )
r$   r"   r$  r%  r&  r'  r(  r   r   r   s
             r%   __init__zQuantizedInitializer.__init__  sF     	&

&	,	r(   r/   r0   r1   __doc__r)  r5   r(   r%   r"  r"  |  s     r(   r"  c                       e Zd ZdZ	 	 	 	 ddZy)QuantizedValuezI
    Represents a linearly quantized value (input\output\intializer)
    Nc
                    || _         || _        || _        || _        || _        || _        || _        || _        |	| _        y r    )	original_nameq_name
scale_namezp_name
value_typer   	node_type
node_qtype
scale_type)
r$   r"   new_quantized_namer1  zero_point_namequantized_value_typer   r4  r5  r6  s
             r%   r)  zQuantizedValue.__init__  sD     "($&.	"$$r(   )NNNNr*  r5   r(   r%   r-  r-    s     %r(   r-  c                      e Zd ZdZd Zy)BiasToQuantizez+
    Represents a bias to be quantized
    c                .    || _         || _        || _        y r    )	bias_name
input_nameweight_name)r$   r=  r>  r?  s       r%   r)  zBiasToQuantize.__init__  s    "$&r(   Nr*  r5   r(   r%   r;  r;    s    'r(   r;  c                   | j                   dk(  rt        d| j                   d      | j                   dk(  r| j                  }n#| j                   dk(  r| j                  }n| j                   dk(  r| j
                  }n| j                   dk(  r| j                  }n| j                   dk(  r| j                  }n| j                   d	k(  r| j                  }n| j                   d
k(  r| j                  }nz| j                   dk(  r| j                  }n^| j                   dk(  r| j                  }nB| j                   dk(  r| j                  }n&t        d| j                   d| j                    d      | j                  |iS )z
    Convert attribute to kwarg format for use with onnx.helper.make_node.
        :parameter attribute: attribute in AttributeProto format.
        :return: attribute in {key: value} format.
    r   z
attribute z does not have type specified.r   r?   r@   rA   rB   rC   ri      	   r   z has unsupported type rH   )rt   r+   r"   r   r   srF   gfloatsintsstringstensorsgraphs)	attributer   s     r%   attribute_to_kwargrK    s;    ~~:inn%55STUU ~~	1		1		1		1		1	  	1		1	!!	1	!!	2	  :inn%55KINNK[[\]^^NNE""r(   c                t    |D cg c]  }|j                   | k(  s| }}t        |      dkD  r|d   S dS c c}w )z
    Helper function to find item by name in a list.
        parameter item_name: name of the item.
        parameter item_list: list of items.
        return: item if found. None otherwise.
    r   N)r"   r}   )	item_name	item_listitemitemss       r%   find_by_namerQ    sB     (Bid499	+ATiEB5zA~58/4/ Cs   55c                R    d}t        t        |            D ]  }||   | k(  s|} |S )zC
    Helper function to return index of an item in a node list
    rm   )r   r}   )	elem_name	elem_listelem_idxr   s       r%   get_elem_indexrV    s4     H3y>"Q<9$H # Or(   c                H    t         j                  j                  d| |g|      S )z
    Helper function to create a Mul node.
        parameter inputs: list of input names.
        parameter output: output name.
        parameter name: name of the node.
        return: Mul node in NodeProto format.
    Mul)r   r   r   )inputsoutputr"   s      r%   get_mul_noder[    s!     ;;  $??r(   c                l    | j                   j                  | j                  |z   | j                  z         S )zp
    Helper function to generate a identifiable filepath by concatenating the given identifier as a suffix.
    )parentjoinpathstemsuffix)filename
identifiers     r%   generate_identified_filenamerc    s+     ??##HMMJ$>$PQQr(   c                `   dd l }dd lm} dd l} |j                  |j
                         t        d       t        |        t        d       t        |       |j                  | |d       |j                  d       |j                  d       |j                  d	       |j                          y )
Nr   )	thresholdz
Histogram:zHistogram Edges:T)fillzTensor valueCountszTensor value V.S. Counts)sysmatplotlib.pyplotpyplotrr   set_printoptionsmaxsizeprintstairsxlabelylabeltitleshow)hist
hist_edgesrh  pltrr   s        r%   
apply_plotrv    s    #ES[[1	,	$K	
	*JJtZdJ+JJ~JJxII()HHJr(   c           	     B	   ddl ddl}ddlddlmc mc m} ddlmc mc m} ddl	m
mm t        j                  d|          G fddj                        }j!                  | |      }t#        t$        j&                  j)                  |d      d	      5 }|j+                  |       ddd       j-                  d      }|j/                  d
      }	g }
t1        | j3                               D ]  }| |   }|j5                         }t7        |j9                  d|      j;                               t7        |j9                  d|      j;                               g}t=        t?        |            }|	jA                  |      }|	jA                  |      }|jC                  |	       |jE                  |	|       |jG                  |	|       |jI                  |	      }|
jK                  |        |jM                  |	tO        |
             |
D ]  }|	jQ                  |        |	jS                         }|jU                  |	       |jW                  |	|       |jY                  |	      }|	j[                  |       |	j]                         }t#        t$        j&                  j)                  |d      d      5 }|j+                  |       ddd       t$        j^                  j9                  dd      dv r|j                  ja                  |d      }|jc                         }te        |      D ]Y  }|jg                  |      }t        j                  |ji                                t        j                  |jk                                [ t#        t$        j&                  j)                  |d      d	      5 }t1        | j3                               D ]  }| |   }|j5                         }t7        |j9                  d|      j;                               t7        |j9                  d|      j;                               g}|dz   t=        t?        |            z   }|j+                  |       |j+                  d        	 ddd       y# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   yxY w)z>
    Helper function to write calibration table to files.
    r   N)CalibrationMethod
TensorDataTensorsDatazcalibration cache: c                       e Zd Z fdZy)*write_calibration_table.<locals>.MyEncoderc                N   t        |f      r|j                         S t        |j                        r'|j                         t	        |j
                        ddS t        |      r"|j                  j                  t	        |      dS j                  j                  | |      S )Nznumpy.array)r   re   CLS)r~  r   )
rx   to_dictry   tolistr   re   	__class__r/   JSONEncoderdefault)r$   objrx  ry  rz  jsonnps     r%   r  z2write_calibration_table.<locals>.MyEncoder.default2  s    #
K89{{}$#rzz* #

s399~m\\#01"}}55CII##++D#66r(   N)r/   r0   r1   r  )rx  ry  rz  r  r  s   r%   	MyEncoderr|  1  s    	7 	7r(   r  )clszcalibration.jsonwi   highestlowestzcalibration.flatbufferswbQUANTIZATION_DEBUG)r   1zcalibration.cache 
)6r  flatbuffersrr   5onnxruntime.quantization.CalTableFlatBuffers.KeyValuequantizationCalTableFlatBuffersKeyValue5onnxruntime.quantization.CalTableFlatBuffers.TrtTableTrtTable"onnxruntime.quantization.calibraterx  ry  rz  logginginfor  dumpsopenospathjoinwriterw   Buildersortedkeysr  floatr  rO  r   r   CreateStringKeyValueStartKeyValueAddKeyKeyValueAddValueKeyValueEndrv   TrtTableStartDictVectorr}   PrependUOffsetTRelative	EndVectorTrtTableStartTrtTableAddDictTrtTableEndFinishOutputenvironGetRootAsTrtTable
DictLengthr   DictKeyValue)calibration_cachedirr  r  r  r  	json_datafiler   builderkey_value_listkeyr   d_valuesrE  r   flat_key
flat_value	key_value	main_dict	cal_tablebufdict_lenr   rx  ry  rz  r  r  s                           @@@@@r%   write_calibration_tabler  !  s   
 LLLL]]LL&'8&9:;7 7D$$ 7 

,)
<I	bggll3 23S	9T

9 
: 88A;D!!$'GN',,./"3'>>#(,,y$/4467(,,x.3356
 CK '',))%0
w'2!!':6((1	i(# 0& $$Wc..AB#	''	2 $!!#I7#Wi0$$W-INN9
..
C	bggll3 9:D	AT

3 
B 
zz~~*A.(:%%77Q?	'')xA!q)ILL)LL*+ ! 
bggll3 34c	:d+0023C&s+F~~'Hhll9d388:;hll8T2779:F #ICK 00EJJuJJt 4 
;	:g 
:	9L 
B	A 
;	:s%   $Q;6R/CR;RRRc                   | dk(  j                  t        j                        }| dk7  j                  t        j                        }|j                         }| j                  |z
  }|sy|t        |      z  t        |      z  }|dk  sJ d|||fz         | j                  t        j                        }|||z  | |z  z   z  }|dk  j                         dk(  sJ |S )a~  Given a discrete distribution (may have not been normalized to 1),
    smooth it by replacing zeros with eps multiplied by a scaling factor
    and taking the corresponding amount off the non-zero values.
    Ref: http://web.engr.illinois.edu/~hanj/cs412/bk3/KL-divergence.pdf
         https://github.com//apache/incubator-mxnet/blob/master/python/mxnet/contrib/quantization.py
    r   Nr   z"n_zeros=%d, n_nonzeros=%d, eps1=%f)r   rr   r{   sumsizer  )pepsis_zerosis_nonzerosn_zeros
n_nonzeroseps1rs  s           r%   smooth_distributionr  }  s     Qu}}-H6//%--0KllnG'!Jw%
"33D#: ;?  : 88EMM"DC(Nte{222DAI??!!!Kr(   c                    t        j                  | j                         d      }|j                  j                  D ]  }t        j                  |      s y y)NF)load_external_dataT)r   loadas_posixgraphr$  r   uses_external_data)
model_pathmodel
intializers      r%   model_has_external_datar    sE    IIj))+FEkk--
22:> . r(   c                    t               }|j                         |_        t        j                  |_        i }dg|d<   t        | j                         |fddgi|}y)z
        Generate model that applies graph optimization (constant folding, etc.)
        parameter model_path: path to the original onnx model
        parameter opt_model_path: path to the optimized onnx model
    :return: optimized onnx model
    ConstantSharingdisabled_optimizers	providersCPUExecutionProviderN)r   r  optimized_model_filepathr   ORT_ENABLE_BASICgraph_optimization_levelr   )r  opt_model_pathsess_optionkwargs_s        r%   optimize_modelr    sb     !"K+9+B+B+DK(+A+R+RK(F%6$7F !,,.jH^G_jcijAr(   c                    ddi}| j                   r8| j                   D ])  }|j                  |j                  |j                  i       + t        j
                  j                  | |       y)z>Tag the model that it went through quantization pre-processingonnx.quant.pre_processonnxruntime.quantNmetadata_propsupdater  r   r   r   set_model_props)r  r  props      r%   add_pre_process_metadatar    sU    .0CDN((D!!488TZZ"89 )KK~6r(   c                    | j                   r2| j                   D ]#  }|j                  dk(  s|j                  dk(  s# y y)zCCheck the model whether it went through quantization pre-processingr  r  TFr  r  r   )r  r  s     r%   model_has_pre_process_metadatar    s<    ((Dxx33

FY8Y ) r(   c                    ddi}| j                   r8| j                   D ])  }|j                  |j                  |j                  i       + t        j
                  j                  | |       y )N
onnx.inferr  r  )r  r  r  s      r%   add_infer_metadatar    sU    "$78N%%A!!155!''"23 &KK~6r(   c                    | j                   r2| j                   D ]#  }|j                  dk(  s|j                  dk(  s# y y)Nr  r  TFr  )r  r  s     r%   model_has_infer_metadatar    s;    %%Auu$4G)G & r(   c                    t        | d      }t        j                  j                  t	        |       t	        |             t        j
                  |j                               }t        |       |j                          |S )Nz	-inferred)	rc  r   shape_inferenceinfer_shapes_pathr   r  r  r  unlink)r  inferred_model_pathr  s      r%   load_model_with_shape_inferr     s`    6z;O**3z?C@S<TUII)2245Eu Lr(   c                    t        j                  d      5 }t        |      j                  d      }t	        j
                  | |j                         d       t        |      cd d d        S # 1 sw Y   y xY w)Nz
ort.quant.)prefixz
model.onnxT)save_as_external_data)tempfileTemporaryDirectoryr   r^  r   
save_modelr  r   )r  quant_tmp_dirr  s      r%   &save_and_reload_model_with_shape_inferr    sT    		$	$L	9]-(11,?
z224DQ*:6 
:	9	9s   AA,,A5c                   | j                   t        j                  j                  t        j                  j                  fv rt
        j                  j                  |       S t        d| j                   dt        | j                             )Nz&Only float type is supported. Weights z is )r   r   r   r   r   r   r   to_arrayr+   r"   type_to_name)r$  s    r%   r   r     su    !7!7!=!=z?U?U?]?] ^^  ))+66

01A1A0B$|T_TiTiGjFkl r(   c                    | dz   S )N_QuantizeLinearr5   tensor_names    r%   add_quant_suffixr    s    ***r(   c                    | t         z   S r    )QUANT_INPUT_SUFFIXr  s    r%   add_quant_input_suffixr    s    +++r(   c                    | dz   S )N_QuantizeLinear_Outputr5   r  s    r%   add_quant_output_suffixr    s    111r(   c                    | dz   S )N_DequantizeLinearr5   r  s    r%   add_dequant_suffixr    s    ,,,r(   c                    | dz   S )N_DequantizeLinear_Inputr5   r  s    r%   add_dequant_input_suffixr    s    222r(   c                    | t         z   S r    )DEQUANT_OUTPUT_SUFFIXr  s    r%   add_dequant_output_suffixr    s    ...r(   )NN)FN)FNNN)r   numpy.ndarrayr   onnx.TensorProto.DataTyper   boolr   r"  r   float | Noner   r#  r   r#  returnz#tuple[numpy.ndarray, numpy.ndarray])r$  z2tuple[numpy.ndarray, numpy.ndarray, numpy.ndarray])r   onnx.TensorProtor   r!  r   r   r   r   r   z
int | Noner   z
str | Noner$  r%  )FF)r   intr  r&  r$  ztuple[bool, int])r  r   r$  r  )ra  r   rb  r   r$  r   )rH   )g-C6?)r  r   )r  r   r  r   )r  r   )r  r   r$  r"  )r  r   r$  r   )r  r   r$  r   )r$  r   r$  r   )r  r   r$  r   )r$  r   )l
__future__r   r  r  r  enumr   pathlibr   rr   r   r   r   r   r	   r   onnx.helperr
   r   r   r   onnx.referencer   onnxruntimer   r   r   r   r   ImportErrorr   r   onnx.reference.op_runr   __producer____version__onnx_domain	ms_domainQUANT_OP_NAMEr  DEQUANT_OP_NAMEr  r   MODEL_SIZE_THRESHOLDr   r  rx   getattrr&  r  r   r7   r>   rZ   rJ   re   rL   rP   rN   rR   rV   rT   r   rw   ra   r`   rc   rb   r  r  r  r   r   r   r   r   r   r  r   r  r  r   r"  r-  r;  rK  rQ  rV  r[  rc  rv  r  r  r  r  r  r  r  r  r   r  r   r  r  r  r  r  r  )ks   0r%   <module>r8     sC   #  	      > > & Q Q - P P@?
7 	 , $2 ' !  474Dq4Dq
SZ[fhiSjloHpQ'*4Dqt  #> #>L$   V!4  +%++g"6  +%++g"6!!;5;;x#8''  %    ;5;;q#DkekkRU]b]h]hFi"j+%++d%**"E{u{{SV^c^h^hGi!j!!KEKK$FTYafamamHn#o  ;5;;vU[[#I;5;;W\didodoKp"q  ;5;;q#>BV[@\"]+%++b"={u{{1TX?Y!Z    ;5;;q#DkekkRU]b]h]hFi"j+%++d%**"E{u{{SV^c^h^hGi!j!!KEKK$FTYafamamHn#o  ;5;;vU[[#I;5;;W\didodoKp"q	!    ;5;;q#DkekkRU]b]h]hFi"j+%++c"DkekkRT\a\f\fFg!h!!KEKK$FTYafamamHn#o  ;5;;vU[[#I;5;;W\didodoKp"q  ;5;;q#={u{{1TX?Y"Z+%++b"={u{{1TX?Y!Z  )+ A 13h<~"R #'"&"&;G
;G);G ;G 	;G
 !;G  ;G  ;G );G~ hl:=7:=D $(L L )L  L  	L 
 L  "L  L ^@	< >% %8' '"#J0@R$Yx:k 777+,2-3/Q  L  DE  $ rsB   \7 ] $] 
]# ]#7]]	]]] ] 