
    	g                        d dl Z d dlZd dlZd dlZd dlZd dlmZ d dlmZ d dl	m
Z
mZmZmZmZ d dlZd dlZd dlmZmZmZmZ d dlZddlmZmZmZ dej6                  d	ej6                  d
ej6                  fdZ	 	 d-dej6                  d	ej6                  dee   ded
ej6                  f
dZ G d d      Z  G d d      Z! G d de      Z" G d de jF                        Z$ G d d      Z% G d de%      Z& G d de%      Z' G d de'      Z( G d  d!e'      Z) G d" d#e'      Z* G d$ d%e jF                        Z+ G d& d'e+      Z,dd(e"jZ                  d)i fd*ee.ef   d+eee.      fd,Z/y).    N)Enum)Path)DictOptionalSequenceTupleUnion)
ModelProtoTensorProtohelpernumpy_helper   )
apply_plotload_model_with_shape_infersmooth_distributionpkqkreturnc                    t        j                  | j                  | j                        }| dd t        j                  | dd |dd z        z  |dd | dk(  |dk\  z  }d||<   | dkD  |dkD  z  }t         j
                  || <   |S )z
    See https://docs.scipy.org/doc/scipy/reference/generated/scipy.special.rel_entr.html#scipy.special.rel_entr.
    Python implementation.
    dtypeNr   )npemptyshaper   loginf)r   r   resc2c1s        W/var/www/openai/venv/lib/python3.12/site-packages/onnxruntime/quantization/calibrate.pyrel_entrr!      s    
 ((288288
,CURVVBqEBqEM**CF
'bAg	BCG
q&R!V	BvvCHJ    baseaxisc                 R   ||dkD  sJ d       |J d       t        j                  |       j                  t         j                        } d| z  t        j                  | |d      z  } t        j                  |      j                  t         j                        }t        j
                  | |      \  } }d|z  t        j                  ||d      z  }t        | |      }t        j                  ||      }||t        j                  |      z  }|j                  | j                        S )z
    Simplifeied version of entropy.
    Source: https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.entropy.html.
    This avoids taking a dependency on scipy just for this function.
    r   z0base={base} must be a positive number or `None`.z
qk is None      ?T)r$   keepdimsr$   )	r   asarrayastypefloat32sumbroadcast_arraysr!   r   r   )r   r   r#   r$   vecss         r    entropyr0   '   s     <4!8W%WW#>'<'>	B		rzz	*B	rBFF2D48	8B	B		rzz	*B  R(FB	rBFF2D48	8B
2r
C
sA	RVVD\88BHHr"   c                   `    e Zd Z eg d      Z eg d      Zd Zed        Zed        Z	d Z
y)
TensorData)avgstdlowesthighesthist
hist_edgesbins)r3   r4   r5   r6   r8   c                    t        |j                               | _        |j                         D ]  \  }}|t        j
                  vr t        d|dt        j
                   d      |t        j                  v rmt        |d      st        dt        |       d|      |j                  t        j                  t        j                  fvrt        d|j                   d|      t        | ||        y )NzUnexpected value z not in .r   Unexpected type z for k=zUnexpected dtype )listkeys_attrsitemsr2   _allowed
ValueError_floatshasattrtyper   r   float16r+   setattr)selfkwargskvs       r    __init__zTensorData.__init__G   s    6;;=)LLNDAq
+++ #4QE*BUBUAVVW!XYYJ&&&q'*$'7Qyu%MNN772::rzz"::$'8	%NOOD!Q #r"   c                     t        | d      rt        | d      st        dt        |        d      | j                  | j                  fS )Nr5   r6   z0Attributes 'lowest' and/or 'highest' missing in r;   )rD   AttributeErrordirr5   r6   rH   s    r    range_valuezTensorData.range_valueS   sF    tX&gdI.F #STWX\T]S^^_!`aaT\\**r"   c                     t        | d      rt        | d      st        dt        |        d      | j                  | j                  fS )Nr3   r4   z)Attributes 'avg' and/or 'std' missing in r;   )rD   rN   rO   r3   r4   rP   s    r    avg_stdzTensorData.avg_stdY   sC    tU#74+? #LSQUYKWX!YZZ$((##r"   c                     | j                   D ci c]  }|t        | |       }}| j                  j                  |d<   |S c c}w )NCLS)r?   getattr	__class____name__)rH   rJ   datas      r    to_dictzTensorData.to_dict_   sC    -1[[9[74##[9nn--U :s   A N)rX   
__module____qualname__	frozensetrA   rC   rL   propertyrQ   rS   rZ    r"   r    r2   r2   C   sJ    Z[HIJG
  + +
 $ $
r"   r2   c                   \    e Zd Zdeeeeef   f   fdZd Z	d Z
d Zd Zd Zd Zd	 Zd
 Zy)TensorsDatarY   c           
      b   || _         i | _        |j                         D ]  \  }}t        |t              st        dt        |       d      t        |t              r|t        j                  k(  r/t        |      dk(  r!t        |d   |d         | j                  |<   t        |      dk(  r)t        |d   |d   |d   |d   	      | j                  |<   t        d
|ddt        |       d| d      t        |t              st        dt        |       d      || j                  |<    y )NzKeys must be strings not r;      r   r   r5   r6         )r5   r6   r7   r9   zUnexpected tuple for rz	, it has z elements: zValues must be TensorData not )calibration_methodrY   r@   
isinstancestr	TypeErrorrE   tupleCalibrationMethodMinMaxlenr2   )rH   rh   rY   rJ   rK   s        r    rL   zTensorsData.__init__g   s   "4	JJLDAqa%";DG9A FGG!U#%):)A)AAc!fPQk#-QqT1Q4#HDIIaLq6Q;#-QqT1Q4aPQdYZ[\Y]#^DIIaL"7!uIc!fX[YZX[[\ ]^^a,"@a	 KLLDIIaL !r"   c              #   8   K   | j                   E d {    y 7 wNrY   rP   s    r    __iter__zTensorsData.__iter__y   s     99s   c                     || j                   v S rq   rr   rH   keys     r    __contains__zTensorsData.__contains__|   s    diir"   c                      | j                   |   S rq   rr   ru   s     r    __getitem__zTensorsData.__getitem__   s    yy~r"   c                 \    || j                   vrt        d|d      || j                   |<   y )Nz)Only an existing tensor can be modified, z is not.)rY   RuntimeError)rH   rv   values      r    __setitem__zTensorsData.__setitem__   s1    dii!J3'QYZ[[		#r"   c                 6    | j                   j                         S rq   )rY   r>   rP   s    r    r>   zTensorsData.keys   s    yy~~r"   c                 6    | j                   j                         S rq   )rY   valuesrP   s    r    r   zTensorsData.values   s    yy!!r"   c                 6    | j                   j                         S rq   )rY   r@   rP   s    r    r@   zTensorsData.items   s    yy  r"   c                 b    | j                   j                  | j                  | j                  d}|S )N)rU   rY   rh   )rW   rX   rY   rh   )rH   rY   s     r    rZ   zTensorsData.to_dict   s/     >>**II"&"9"9

 r"   N)rX   r[   r\   r   rj   r	   r2   r   rL   rs   rw   ry   r}   r>   r   r@   rZ   r_   r"   r    ra   ra   f   sJ    c5UAR;S6S1T $ 
 "!r"   ra   c                       e Zd ZdZdZdZdZy)rm   r   r   rc   rf   N)rX   r[   r\   rn   Entropy
PercentileDistributionr_   r"   r    rm   rm      s    FGJLr"   rm   c                   h    e Zd Zed        Zej                  defd       Zd Z	d Z
d Zdedefd	Zy
)CalibrationDataReaderc                 X    t        |d      xr t        |j                        xs t        S )Nget_next)rD   callabler   NotImplemented)clssubclasss     r    __subclasshook__z&CalibrationDataReader.__subclasshook__   s%    x,L(:K:K1L^P^^r"   r   c                     t         )z9generate the input data dict for ONNXinferenceSession runNotImplementedErrorrP   s    r    r   zCalibrationDataReader.get_next   s
     "!r"   c                     | S rq   r_   rP   s    r    rs   zCalibrationDataReader.__iter__   s    r"   c                 6    | j                         }|t        |S rq   )r   StopIteration)rH   results     r    __next__zCalibrationDataReader.__next__   s    >r"   c                     t         rq   r   rP   s    r    __len__zCalibrationDataReader.__len__       !!r"   start_index	end_indexc                     t         rq   r   )rH   r   r   s      r    	set_rangezCalibrationDataReader.set_range   r   r"   N)rX   r[   r\   classmethodr   abcabstractmethoddictr   rs   r   r   intr   r_   r"   r    r   r      sY    _ _ 	"$ " """S "S "r"   r   )	metaclassc                       e Zd Z	 	 	 	 	 ddeeef   deee      fdZdgfdZ	d Z
defd	Zd
 Zd ZdefdZdefdZy)CalibraterBaseN
model_pathop_types_to_calibratec                 "   t        |t              rt        t        |            | _        n,t        |t              rt        |      | _        nt        d      || _        || _        || _        || _	        || _
        d| _        d| _        dg| _        y)a  
        :param model_path: ONNX model to calibrate. It should be a model file path
        :param op_types_to_calibrate: operator types to calibrate. By default, calibrate all the float32/float16 tensors.
        :param augmented_model_path: save augmented model to this path.
        :param symmetric: make range of tensor symmetric (central point is 0).
        :param use_external_data_format: use external data format to store model which size is >= 2Gb.
        :param per_channel: whether to compute ranges per each channel.
        z model_path should be model path.NCPUExecutionProvider)ri   rj   r   r   modelrB   r   augmented_model_path	symmetricuse_external_data_formatper_channelaugment_modelinfer_sessionexecution_providers)rH   r   r   r   r   r   r   s          r    rL   zCalibraterBase.__init__   s    " j#&4T*5EFDJ
D)4Z@DJ?@@%:"$8!"(@%&!!$:#; r"   r   c                 2    || _         | j                          y)zz
        reset the execution providers to execute the collect_data. It triggers to re-creating inference session.
        N)r   create_inference_session)rH   r   s     r    set_execution_providersz&CalibraterBase.set_execution_providers   s     $7 %%'r"   c                     t        j                         }t         j                  j                  |_        t        j
                  | j                  || j                        | _        y)z9
        create an OnnxRuntime InferenceSession.
        )sess_options	providersN)	onnxruntimeSessionOptionsGraphOptimizationLevelORT_DISABLE_ALLgraph_optimization_levelInferenceSessionr   r   r   )rH   r   s     r    r   z'CalibraterBase.create_inference_session   sN     #1130;0R0R0b0b-(99%%%..
r"   r   c                    |j                   j                  D ci c]  }|j                  | }}|j                  |j                   j                  D ci c]  }|j                  | c}       |j                  |j                   j
                  D ci c]  }|j                  | c}       |j                   j                  D ch c]  }|j                   }}t               }t        j                  t        j                  h}	|j                   j                  D ]  }
| j                  r|
j                  | j                  v s(t        j                  |
j
                  |
j                        D ]a  }||v s||   }|j                   j#                  d      s)|j                   j$                  j&                  |	v sL||vsQ|j)                  |       c  ||fS c c}w c c}w c c}w c c}w )z
        select input/output tensors of candidate nodes to calibrate.
        returns:
            tensors (set): set of tensor name.
            value_infos (dict): tensor name to value info.
        tensor_type)graph
value_infonameupdateoutputinputinitializersetr   FLOATFLOAT16noder   op_type	itertoolschainrE   HasFieldr   	elem_typeadd)rH   r   vivalue_infosotitinitr   tensors_to_calibratetensor_type_to_calibrater   tensor_names               r    select_tensors_to_calibratez*CalibraterBase.select_tensors_to_calibrate   s    .3[[-C-CD-Crrww{-CD%++2D2DE2DBBGGRK2DEF%++2C2CD2CBBGGRK2CDE-2[[-D-DE-DTtyy-DE"u$/$5$5{7J7J#K KK$$D--A[A[1[#,??4::t{{#KK"k1(5GG,,];!#!4!4!>!>BZ!Z!,K!?044[A $L % $[00) EEDEs   GGGG#c                     | j                   S )zP
        return: augmented onnx model. Call after calling augment_graph
        )r   rP   s    r    get_augment_modelz CalibraterBase.get_augment_model  s     zzr"   c                     t         )z
        abstract method: augment the input model to prepare for collecting data. It will:
            1. augment the model to be able to collect desired statistics data
            2. save augmented model to augmented_model_paths
        r   rP   s    r    augment_graphzCalibraterBase.augment_graph  s
     "!r"   data_readerc                     t         )z
        abstract method: collect the tensors that will be used for range computation. It can be called multiple times.
        r   )rH   r   s     r    collect_datazCalibraterBase.collect_data  
     "!r"   r   c                     t         )ze
        abstract method: compute data based on the calibration method stored in TensorsData
        r   rP   s    r    compute_datazCalibraterBase.compute_data"  r   r"   )Naugmented_model.onnxFFF)rX   r[   r\   r	   rj   r   r   r   rL   r   r   r
   r   r   r   r   r   ra   r   r_   r"   r    r   r      s     :>3!& <#t)$ <  (6 <D <R:R (

1 1:""(= ""k "r"   r   c                   z     e Zd Z	 	 	 	 	 	 	 	 ddeeef   deee      f fdZd Z	d Z
defdZd Zd	efd
Z xZS )MinMaxCalibraterr   r   c
                    t         |   ||||||	       g | _        d| _        t	        | j
                  j                  j                        | _        | j
                  j                  j                  D 
ch c]  }
|
j                   c}
| _
        || _        |r|dk  s|dkD  rt        d      || _        || _        yc c}
w )aw  
        :param model_path: ONNX model to calibrate. It is a model path
        :param op_types_to_calibrate: operator types to calibrate. By default, calibrate all the float32/float16 tensors.
        :param augmented_model_path: save augmented model to this path.
        :param symmetric: make range of tensor symmetric (central point is 0).
        :param use_external_data_format: use external data format to store model which size is >= 2Gb
        :param moving_average: compute the moving average of the minimum and maximum values instead of the global minimum and maximum.
        :param averaging_constant: constant smoothing factor to use when computing the moving average.
        :param max_intermediate_outputs: maximum number of intermediate outputs before an intermediate range is computed.
        :param per_channel: whether to compute ranges per each channel.
        )r   r   r   r   r   Nr   r   z;Invalid averaging constant, which should not be < 0 or > 1.)superrL   intermediate_outputscalibrate_tensors_rangero   r   r   r   num_model_outputsr   model_original_outputsmoving_averagerB   averaging_constantmax_intermediate_outputs)rH   r   r   r   r   r   r   r   r   r   r   rW   s              r    rL   zMinMaxCalibrater.__init__*  s    . 	"7!5%=# 	 	
 %'!'+$!$TZZ%5%5%<%<!=AEAQAQAXAX&YAXvv{{AX&Y#,1A59Ka9OZ[["4(@% 'Zs   5B=c                      j                   j                        \  }}t        t        j                               t        j                  t        j                  dgt        j                              } j                  j                  j                  j                  |       d  fd}|D ]  } ||d        ||d        t        j                   j                   j                   j                          y)	z
        Adds ReduceMin and ReduceMax nodes to all quantization_candidates op type nodes in
        model and ensures their outputs are stored as part of the graph output
        :return: augmented ONNX model
        r   c                     |j                   D ]:  }t        j                  j                  | |j                        s.|j
                  c S  t        d|  d      )Nz&Model does not contain a version for 'z'.)opset_importonnxdefshasdomainversionr{   )r   r   r   s      r    get_op_versionz6MinMaxCalibrater.augment_graph.<locals>.get_op_version^  sN     % 2 299==,*=*=>'/// !3 !GyPRSTTr"   c                 4   d}| dz   |z   }|dz   }t         j                  j                  || g|g||      }t         j                  j                  d|g|g|      }j                  j                  j
                  D ci c]  }|j                  | }}|j                  j                  j                  j                  D 	ci c]  }	|	j                  |	 c}	       |j                  j                  j                  j                  D 
ci c]  }
|
j                  |
 c}
       | |v r$||    j                  j                  j                  }nt        d| d      j                  r+t        ||    j                  j                  j                   j"                        }d	gt%        d
|      } |j                        dk  r0|j&                  j)                  t        j*                  d|             nt-        t/        j0                               }t3        j4                  t7        j8                  |t6        j:                        |      }|j                  j)                  |       j                  j                  j<                  j)                  |       j                  j                  j>                  jA                  ||g       j                  j                  j                  j)                  t        jB                  ||d g             y c c}w c c}	w c c}
w )Nr   __Reshape)r'   r   Reshape)inputsoutputsr   z'Unable to guess tensor type for tensor zE, running shape inference before quantization may resolve this issue.r   rc      axesr   )"r   r   	make_noder   r   r   r   r   r   r   rE   r   r   rB   r   ro   r   dimrange	attributeappendmake_attributerj   uuiduuid4r   
from_arrayr   arrayint64r   r   extendmake_tensor_value_info)r   reduce_op_namer'   reduce_outputintermediate_outputreduce_nodereshape_noder   r   oi	onnx_typetensor_rankreduced_axesreduce_axes_namereduce_axesr   reshape_shape_namerH   s                   r    add_reduce_min_maxz:MinMaxCalibrater.augment_graph.<locals>.add_reduce_min_maxd  s    H (#->M"/*"<++//0C/Dx^k 0 K  ;;00+-?@&(	 1 L 261A1A1L1LM1L2277B;1LKM4::3C3C3J3JK3Ja	3JKL4::3C3C3I3IJ3Ia	3IJKk)'499EEOO	 =k_ MZ [  !+k":"?"?"K"K"Q"Q"U"UV !:E![$9:!.$**=B))001F1Fv|1\]'*4::<'8$"."9"9"((<WYW_W_:`br"sK%%,,-=>JJ$$0077DJJ!!((+|)DEJJ##**6+H+HXadhci+jk3 NKJs   ?LL
L	ReduceMin	ReduceMaxsave_as_external_dataN)r   r   rj   r	  r
  r   r  r   r  r  r   r   r  r   saver   r   )rH   tensorsr   reshape_shaper  tensorr   r  s   `     @@r    r   zMinMaxCalibrater.augment_graphS  s     55djjA
 .$//"RXX0NPbc

$$++M:	U,	l\ Fv{3v{3  			JJ%%"&"?"?	
r"   c                     g | _         y rq   r   rP   s    r    clear_collected_dataz%MinMaxCalibrater.clear_collected_data  
    $&!r"   r   c                    	 |j                         }|snt| j                  j                  | j                  j	                  d |             | j
                  2t        | j                        | j
                  k(  r| j                          t        | j                        dk(  r| j                  t        d      | j                         }t        |t              st        dt        |       d      | j                          y )Nr   No data is collected.z+compute_data must return a TensorsData not r;   )r   r   r  r   runr   ro   r(  r   rB   r   ri   ra   rk   rE   )rH   r   r   ts       r    r   zMinMaxCalibrater.collect_data  s     ))+F%%,,T-?-?-C-CD&-QR--9112d6S6SS))+  t(()Q.43O3O3W455![)I$q'RSTUU!!#r"   c                 >   |s|S |j                         D ]  \  }}t        |t              r|j                  d   }|j                  d   }n|\  }}t        ||   t              r%||   j                  d   }||   j                  d   }n||   \  }}| j                  r+|| j
                  ||z
  z  z   }	|| j
                  ||z
  z  z   }
nt        ||      }	t        ||      }
t        |t              st        ||   t              rt        |	|
      ||<   |	|
f||<    |S )Nr   r   rd   )r@   ri   r2   rQ   r   r   minmax)rH   	old_range	new_rangerv   r|   old_minold_maxnew_minnew_max	min_value	max_values              r    merge_rangezMinMaxCalibrater.merge_range  s,   #//+JC%,++A.++A.#( )C.*5#C.44Q7#C.44Q7#,S> ""#d&=&=7AR&SS	#d&=&=7AR&SS	1	1	 %,
9S>:0V!+9i!P	#"+Y!7	#3 ,6 r"   r   c           	         t        | j                        dk(  r| j                  S t        t        | j                  d               D cg c])  }| j                  j                         |   j                  + }}| j                  D cg c]  }t        t        ||             }}i }|D ];  }|j                         D ]&  \  }}|j                  |g       j                  |       ( = || j                  d }	t        dt        |	      d      D cg c]  }|	|   j                  d      d    }
}|D ci c]  }|| j                  vs|||    }}g }t        dt        |	      d      D ]  }| j                  r>t!        j"                  ||	|      d      }t!        j"                  ||	|dz         d      }n=t!        j$                  ||	|      d      }t!        j&                  ||	|dz         d      }| j(                  r]t!        j&                  t!        j*                  |      t!        j*                  |      gd      }|j                  t-        | |g             |j                  t-        ||g              t/        t0        j2                  t        t        |
|                  }| j                  r-| j5                  | j                  |      | _        | j                  S || _        | j                  S c c}w c c}w c c}w c c}w )z
        Compute the min-max range of tensor
        :return: dictionary mapping: {added node names: (ReduceMin, ReduceMax) pairs }
        r   Nrc   r   r(   r   )ro   r   r   r  r   get_outputsr   r   zipr@   
setdefaultr  r   
rpartitionr   r   r   meanr/  r0  r   absrl   ra   rm   rn   r9  )rH   r  output_namesr  output_dicts_listmerged_output_dictdrJ   rK   added_output_namescalibrate_tensor_namesmerged_added_output_dictpairsmin_value_arraymax_value_arraymax_absolute_valuenew_calibrate_tensors_ranges                    r    r   zMinMaxCalibrater.compute_data  s    t(()Q.///JOPSTXTmTmnoTpPqJrsJrQ**668;@@JrsTXTmTm
Tm=PD\#678Tm 	 
  "A	1"--a4;;A> " # *$*@*@*BC>CAsK]G^`a>b"
>bq!,,S1!4>b 	 "

 /A$
.@ATMhMhDhA!!$$.@ 	! $
 q#0115A"""$''*BCUVWCX*Y`a"b"$''*BCUVWZ[V[C\*]de"f"$&&)ABTUVBW)X_`"a"$&&)ABTUVYZUZB[)\cd"e~~%'VVRVVO-Dbff_F],^ef%g"U%7$79K#LMNUO_#EFG 6 '22C2J2JDQTUkmrQsLt&u#''+/+;+;D<X<XZu+vD( +++ ,GD(+++O t
"
$
s   .K3K8K=1LL)Nr   FFF{Gz?NF)rX   r[   r\   r	   rj   r   r   r   rL   r   r(  r   r   r9  ra   r   __classcell__rW   s   @r    r   r   )  su     :>3!&!%'A#t)$'A  (6'ARG
R'$(= $(B0,k 0,r"   r   c                   v     e Zd Z	 	 	 	 	 	 	 	 	 d
deeef   deee      f fdZd Z	d Z
defdZdefd	Z xZS )HistogramCalibraterr   r   c                    t         |   |||||       g | _        d| _        t	        | j
                  j                  j                        | _        | j
                  j                  j                  D ch c]  }|j                   c}| _
        d| _        || _        || _        || _        |	| _        d| _        |
| _        yc c}w )a=  
        :param model_path: ONNX model to calibrate. It is a model path.
        :param op_types_to_calibrate: operator types to calibrate. By default, calibrate all the float32/float16 tensors.
        :param augmented_model_path: save augmented model to this path.
        :param use_external_data_format: use external data format to store model which size is >= 2Gb
        :param method: A string. One of ['entropy', 'percentile'].
        :param symmetric: make range of tensor symmetric (central point is 0).
        :param num_bins: number of bins to create a new histogram for collecting tensor values.
        :param num_quantized_bins: number of quantized bins. Default 128.
        :param percentile: A float number between [0, 100]. Default 99.99.
        :param scenario: see :class:`DistributionCalibrater`
        )r   r   r   r   N)r   rL   r   r   ro   r   r   r   r   r   r   	collectormethodnum_binsnum_quantized_bins
percentiler   scenario)rH   r   r   r   r   rT  r   rU  rV  rW  rX  r   rW   s               r    rL   zHistogramCalibrater.__init__  s    2 	"7!5%= 	 	
 %'!'+$!$TZZ%5%5%<%<!=AEAQAQAXAX&YAXvv{{AX&Y# "4$$(!  'Zs   4Cc                 Z   | j                  | j                        \  | _        }| j                  D ]C  }|| j                  vs| j                  j                  j
                  j                  ||          E t        j                  | j                  | j                  | j                         y)z
        make all quantization_candidates op type nodes as part of the graph output.
        :return: augmented ONNX model
        r   N)r   r   r   r   r   r   r  r   r"  r   r   )rH   r   r%  s      r    r   z!HistogramCalibrater.augment_graph4  s    
 261Q1QRVR\R\1].!;//FT888

  ''..{6/BC 0 			JJ%%"&"?"?	
r"   c                     g | _         y rq   r'  rP   s    r    r(  z(HistogramCalibrater.clear_collected_dataD  r)  r"   r   c           	         | j                   j                         D ch c]  }|j                   }}| j                   j                         D cg c]  }|j                   }}	 |j	                         }|sn| j                   j                  d|      }g }t        |      D ]B  \  }}	||   |v r%|j                  t        j                  |	             2|j                  |	       D | j                  j                  |       t        | j                        dk(  rt        d      | j                  D 
cg c]  }
t        t        ||
             }}
i }|D ];  }|j                         D ]&  \  }}|j                  |g       j                  |       ( = |D ci c]  }|| j                   v s|||    }}| j"                  sRt%        | j&                  | j(                  | j*                  | j,                  | j.                  | j0                        | _        | j"                  j3                  |       | j5                          yc c}w c c}w c c}
w c c}w )zy
        Entropy Calibrator collects operators' tensors as well as generates tensor histogram for each operator.
        Nr   r+  )rT  r   rU  rV  rW  rX  )r   
get_inputsr   r;  r   r,  	enumerater  copyr   ro   rB   r   r<  r@   r=  r   rS  HistogramCollectorrT  r   rU  rV  rW  rX  collectr(  )rH   r   node_arginput_names_setrA  r   r   fixed_outputsoutput_indexr   r  rB  merged_dictrD  rJ   rK   r  clean_merged_dicts                     r    r   z HistogramCalibrater.collect_dataG  s&    :>9K9K9V9V9XY9XX8==9XY6:6H6H6T6T6VW6V(6VW ))+F((,,T6:G M(1'(:$f-@!((6):;!((0	 ); %%,,]; " t(()Q.455 UYTmTm
Tm=PD\#678Tm 	 
 "A	1&&q"-44Q7 " # 9Df1qDLeLeGeQA.f~~/{{..#'#:#:??DN 	01!!#[ ZW,
 gs   H>I2II*Ir   c                 n   | j                   st        d      t        | t              rt        j
                  }nZt        | t              rt        j                  }n9t        | t              rt        j                  }nt        dt        |        d      t        || j                   j                               S )z
        Compute the min-max range of tensor
        :return: dictionary mapping: {tensor name: (min value, max value)}
        z9No collector created and can't generate calibration data.zUnknown calibrater z". This method must be overwritten.)rS  rB   ri   EntropyCalibraterrm   r   PercentileCalibraterr   DistributionCalibraterr   rk   rE   ra   compute_collection_result)rH   cals     r    r   z HistogramCalibrater.compute_dataz  s    
 ~~XYYd-.#++C23#..C45#00C1$t*=_`aa3 H H JKKr"   )	Nr   FrW  F      -X@same)rX   r[   r\   r	   rj   r   r   r   rL   r   r(  r   r   ra   r   rN  rO  s   @r    rQ  rQ    sp     :>3!&*!#t)$*!  (6*!X
 '1$(= 1$fLk Lr"   rQ  c                   N     e Zd Z	 	 	 	 	 	 	 ddeeef   deee      f fdZ xZ	S )rh  r   r   c	           
      4    t         	|   ||||||||       y)a  
        :param model_path: ONNX model to calibrate. It is a model path
        :param op_types_to_calibrate: operator types to calibrate. By default, calibrate all the float32/float16 tensors.
        :param augmented_model_path: save augmented model to this path.
        :param use_external_data_format: use external data format to store model which size is >= 2Gb
        :param method: A string. One of ['entropy', 'percentile', 'distribution'].
        :param symmetric: make range of tensor symmetric (central point is 0).
        :param num_bins: number of bins to create a new histogram for collecting tensor values.
        :param num_quantized_bins: number of quantized bins. Default 128.
        )rT  r   rU  rV  Nr   rL   )
rH   r   r   r   r   rT  r   rU  rV  rW   s
            r    rL   zEntropyCalibrater.__init__  s/    * 	! $1 	 		
r"   )Nr   Fr0   Frm  rm  
rX   r[   r\   r	   rj   r   r   r   rL   rN  rO  s   @r    rh  rh    sH     :>3!&
#t)$
  (6
 
r"   rh  c                   N     e Zd Z	 	 	 	 	 	 	 ddeeef   deee      f fdZ xZ	S )ri  r   r   c	           
      4    t         	|   ||||||||       y)a  
        :param model_path: ONNX model to calibrate. It is a model path
        :param op_types_to_calibrate: operator types to calibrate. By default, calibrate all the float32/float16 tensors.
        :param augmented_model_path: save augmented model to this path.
        :param use_external_data_format: use external data format to store model which size is >= 2Gb
        :param method: A string. One of ['entropy', 'percentile', 'distribution'].
        :param symmetric: make range of tensor symmetric (central point is 0).
        :param num_quantized_bins: number of quantized bins. Default 128.
        :param percentile: A float number between [0, 100]. Default 99.99.
        )rT  r   rU  rW  Nrs  )
rH   r   r   r   r   rT  r   rU  rW  rW   s
            r    rL   zPercentileCalibrater.__init__  s/    * 	! $! 	 		
r"   )Nr   FrW  Frn  ro  rt  rO  s   @r    ri  ri    sH     :>3!&
#t)$
  (6
 
r"   ri  c                   L     e Zd Z	 	 	 	 	 	 ddeeef   deee      f fdZ xZ	S )rj  r   r   c           	      2    t         |   |||||||       y)a  
        :param model_path: ONNX model to calibrate. It is a model path
        :param op_types_to_calibrate: operator types to calibrate. By default, calibrate all the float32/float16 tensors.
        :param augmented_model_path: save augmented model to this path.
        :param use_external_data_format: use external data format to store model which size is >= 2Gb
        :param method: A string. One of ['entropy', 'percentile', 'distribution'].
        :param symmetric: make range of tensor symmetric (central point is 0).
        :param num_bins: number of bins to create a new histogram for collecting tensor values.
        :param scenario: for float 8 only, if `scenario="same"`,
            the algorithm weights and float 8 follow the same distribution,
            if `scenario="p3"`, it assumes the weights follow
            a gaussian law and float 8 ~ X^3 where X is a gaussian law
        )rT  rU  rX  Nrs  )	rH   r   r   r   r   rT  rU  rX  rW   s	           r    rL   zDistributionCalibrater.__init__  s,    . 	! $ 	 	
r"   )Nr   Fdistributionrm  rp  rt  rO  s   @r    rj  rj    sE     :>3!&
#t)$
  (6
 
r"   rj  c                   X    e Zd ZdZej
                  d        Zej
                  d        Zy)CalibrationDataCollectorzL
    Base class for collecting data for calibration-based quantization.
    c                     t         )z
        Generate informative data based on given data.
            name_to_arr : dict
                tensor name to NDArray data
        r   rH   name_to_arrs     r    r`  z CalibrationDataCollector.collect  s
     "!r"   c                     t         )z?
        Get the optimal result among collection data.
        r   rP   s    r    rk  z2CalibrationDataCollector.compute_collection_result  s
    
 "!r"   N)rX   r[   r\   __doc__r   r   r`  rk  r_   r"   r    r{  r{    s;     	" " 	" "r"   r{  c                   d    e Zd ZdZd Zd Zd Zd Zd Zd Z	d Z
d	 Zd
 Zedd       Zd Zd Zy)r_  a`  
    Collecting histogram for each tensor. Percentile and Entropy method are supported.

    ref: https://github.com//apache/incubator-mxnet/blob/master/python/mxnet/contrib/quantization.py
    ref: https://docs.nvidia.com/deeplearning/tensorrt/pytorch-quantization-toolkit/docs/_modules/
                 pytorch_quantization/calib/histogram.html
    c                 f    i | _         || _        || _        || _        || _        || _        || _        y rq   )histogram_dictrT  r   rU  rV  rW  rX  )rH   rT  r   rU  rV  rW  rX  s          r    rL   zHistogramCollector.__init__  s5     " "4$ r"   c                     | j                   S rq   )r  rP   s    r    get_histogram_dictz%HistogramCollector.get_histogram_dict  s    """r"   c                     t        d       | j                  dv r| j                  |      S | j                  dk(  r.| j                  r| j	                  |      S | j                  |      S t        d      )Nz/Collecting tensor data and making histogram ...>   r0   ry  rW  DOnly 'entropy', 'percentile' or 'distribution' methods are supported)printrT  collect_valuer   collect_absolute_valuerB   r}  s     r    r`  zHistogramCollector.collect  sl    ?@ ;;55%%k22[[L(~~22;??))+66cddr"   c                    |j                         D ]D  \  }}t        |t              rz|D ]2  }t        |t        j                        rJ dt        |       d|        t        d |D              }t        |      dk(  sJ d| d|       t        j                  |      }n6t        |t        j                        st        dt        |       d|      |}|j                         }|j                  dkD  r+t        j                  |      }t        j                  |      }nBt        j                  d|j                        }t        j                  d|j                        }t        j                   |      }|| j"                  vrxt        j$                  || j&                        \  }	}
|
j)                  |j                        }
|j                  t        j*                  k7  sJ d	       |	|
||f| j"                  |<   | j"                  |   }|d
   }|d   }t-        |d      sJ dt        |              t-        |d      sJ dt        |              |d   }|d   }t        j                  |      }||d   kD  rB|d   |d   z
  }t        j.                  |d   |z   ||z   |      }t        j0                  ||f      }t        j$                  ||      \  }	}
|
j)                  |j                        }
|	dt        |      xxx |z  ccc |j                  t        j*                  k7  sJ d	       |	|
t        ||      t        ||      f| j"                  |<   G y)z5
        Collect histogram on absolute value
        r<   z for tensor=c              3   4   K   | ]  }|j                     y wrq   r   ).0as     r    	<genexpr>z<HistogramCollector.collect_absolute_value.<locals>.<genexpr>6  s     7hQWWhs   r   z6The calibration expects only one element type but got r   r   )r9   zMonly float32 or float16 is supported, every constant must be explicitly typedrc   rf   r   z'old_min should be a numpy array but is r   N)r@   ri   r=   r   ndarrayrE   r   ro   r)   rB   flattensizer/  r0  r  r   absoluter  	histogramrU  r*   float64rD   arangehstack)rH   r~  r%  data_arrarrdtypesdata_arr_npr7  r8  r7   r8   old_histogramr3  r4  old_histold_hist_edges	temp_amaxwidthnew_bin_edgess                      r    r  z)HistogramCollector.collect_absolute_value.  s@    !, 1 1 3FH(D)#C%c2::6l:J4PS9+Uabhak8ll6 $7h77K1$kKF8S_`f_ijk$ jj2"**5 #3DN3C<PVz!Z[[&%--/K!#FF;/	FF;/	HHQk.?.?@	HHQk.?.?@	++k2KT000#%<<$--#P j'..{/@/@A
%%3cbc3/3ZI.V##F+ $ 3 3F ;'*'*w0k4[\`ah\i[j2kk0w0k4[\`ah\i[j2kk0(+!.q!1FF;/	~b11*1-q0AAE$&IInR.@5.H)V[J[]b$cM%'YY/N%ON#%<<.#Q j'..{/@/@A
_s8}%1%%%3cbc3/3ZWiAXZ]^egpZq.r##F+i !4r"   c           	         |j                         D ]a  \  }}t        j                  |      }|j                         }|j                  dkD  r+t        j
                  |      }t        j                  |      }nBt        j                  d|j                        }t        j                  d|j                        }t        j                  t        t        |      t        |            |j                        }|| j                  v r3| j                  |   }| j                  |||||      | j                  |<   &t        j                  || j                  | |f      \  }}	||	|||f| j                  |<   d y)z1
        Collect histogram on real value
        r   r   r  N)r@   r   r)   r  r  r/  r0  r  r   r@  r  merge_histogramr  rU  )
rH   r~  r%  r  r7  r8  	thresholdr  r7   r8   s
             r    r  z HistogramCollector.collect_valueh  s7    !, 1 1 3FHzz(+H'')H}}q FF8,	FF8,	HHQhnn=	HHQhnn=	S^S^!DHNN[I,,, $ 3 3F ;.2.B.B!8Y	9/##F+ $&<<$--QZPZ\eOf#g j/##F+) !4r"   c                    |\  }}}}	}
||
k  rEt        j                  |t        |      |
 |
f      \  }}||z   |t        ||      t	        |	|      |
fS |
dk(  r-t        j                  |t        |      | |f      \  }}||z  }nft        |      }d|
z  |z  }t        ||
z
  |z  dz         }|d|z  z   }||z  |
z   }t        j                  ||| |f      \  }}||||z
  xxx |z  ccc ||t        ||      t	        |	|      |fS )Nr  r   rc   r   )r   r  ro   r/  r0  r   )rH   r  r  r5  r6  new_thresholdr  r  r3  r4  old_thresholdnew_histr   r7   r8   old_num_bins
old_stridehalf_increased_binsnew_num_binss                      r    r  z"HistogramCollector.merge_histogram  sT   FSC>7G]M),,xX~WdFefKHa8#GW%GW%  !#%<<#h-Q^P^`mOn#o j "8}.=
&)==+HZ*WZ[*[&\#+a2E.EE 3j @= P#%<<,P]~_lNm#n j(<:M+MNRZZNGW%GW% r"   c                 b   | j                   rt        | j                         dk(  rt        d      t        d| j                  d       | j                  dk(  r| j                         S | j                  dk(  r| j                         S | j                  dk(  r| j                         S t        d      )	Nr   z=Histogram has not been collected. Please run collect() first.z0Finding optimal threshold for each tensor using z algorithm ...r0   rW  ry  r  )r  ro   rB   r  rT  compute_entropycompute_percentilecompute_distributionrP   s    r    rk  z,HistogramCollector.compute_collection_result  s    ""c$*=*=&>!&C\]]@~^_;;)#''))[[L(**,,[[N*,,..cddr"   c                    | j                   dk  s| j                   dkD  rt        d      | j                  }| j                   }i }t        dt	        |              t        d| j
                          t        dd|z
   d| d	       |j                         D ]  \  }}|d   }|d
   }|j                         }t        j                  ||z        }	| j                  rft        j                  |	|dz        }
t        j                  ||
   |j                         t        j                  ||
   |j                        f||<   nd|z
  dz  }t        j                  |	d|z
        }
t        j                  |	|      }t        j                  ||   |j                        t        j                  ||
   |j                        f||<   |d   }|d   }||   d   |k  r|||   d
   f||<   ||   d
   |kD  r||   d   |f||<   g ||   |d d ||<   t        j                  j!                  dd      dv st#        ||        |S )Nr   d   z<Invalid percentile. Must be in range 0 <= percentile <= 100.Number of tensors : Number of histogram bins : zPercentile : (g      Y@,)r   r   g      i@r&   rc   rf   QUANTIZATION_DEBUGr   1)rW  rB   r  r  ro   rU  r@   r,   r   cumsumr   searchsortedr  r   osenvirongetr   )rH   r  rW  thresholds_dictr%  r  r7   r8   totalcdf	idx_rightpercent_to_cut_one_sideidx_leftr7  r8  s                  r    r  z%HistogramCollector.compute_percentile  sU   ??Q$//C"7[\\,,__
$S%8$9:;+DMM?;<uz12!J<qAB!/!5!5!7FIQ<D"1JHHJE))D5L)C~~OOCe1CD	 XXj3:;K;KLLHHZ	2*:J:JK+'
 ,1:+=*F'OOC7N1NO	??30GHHHZ19I9IJHHZ	2*:J:JK+' "!I!!Iv&q)I5+4of6Ma6P*Q'v&q)I5+:6+B1+Ey*Q'&K(?&K$r(&KOF#zz~~2A6(B4,; "8> r"   c                    | j                   }| j                  }i }t        dt        |              t        d| j                   d       t        d| j                          |j                         D ]^  \  }}| j                  ||      }|||<   g ||d d ||<   t        j                  j                  dd      dv sMt        |d   |d	          ` |S )
Nr  r  z: (The number may increase depends on the data it collects)zNumber of quantized bins : rc   r  r   r  r   )r  rV  r  ro   rU  r@   get_entropy_thresholdr  r  r  r   )rH   r  rV  r  r%  r  optimal_thresholds          r    r  z"HistogramCollector.compute_entropy  s    ,,!44$S%8$9:;+DMM?:tuv+D,C,C+DEF!/!5!5!7FI $ : :9FX Y&7OF#&J(9&JIbqM&JOF# zz~~2A6(B9Q<16 "8 r"   c                    |dk  rt        d| d      |d d |dd  z   dz  }|dk(  r| |z  j                         | j                         z  }| |dz  z  j                         | j                         z  |dz  z
  dz  }t        j                  ||j                        t        j                  ||j                        fS t        |      |k(  rt        |      dz  dk(  r| ||z  z  j                         | j                         z  }| ||z  |z
  dz  z  j                         | j                         z  dz  }t        j                  ||j                        t        j                  ||j                        fS t        j                  |      |z  }d|t        j                  |      <   d|t        j                  |      <   t        j                  |      |z  |z  }| |z  j                         | j                         z  }| |dz  z  j                         | j                         z  |dz  z
  dz  }t        j                  ||j                        t        j                  ||j                        fS )	Nr   zpower=z <= 0 is invalid.r   r   g      ?rc   r   )	rB   r,   r   r  r   r   r@  isnanisinf)r7   r8   powerr   r3   r4   facts          r    _avg_stdzHistogramCollector._avg_std  s   A:veW,=>??Sb/JqrN2c9A:&=%%'$((*4C619$))+dhhj836AcIC88Cz'7'78"((3jN^N^:___u:3u:>Q#6&%-',,.;CFEMC/A55::<txxzIcQC88Cz'7'78"((3jN^N^:___vvf~& RXXd^ RXXd^5(4/f}!!#dhhj0vqy %%'$((*4sAv=#Exx:#3#34bhhs*JZJZ6[[[r"   c           
         | j                   dk  rt        d      | j                  }i }t        dt	        |              t        d| j                           t        d| j
                  d       |j                         D ]E  \  }}|d   }|d   }|j                  t        j                  k7  sJ | j
                  d	k(  r| j                  ||d
      \  }}n2| j
                  dk(  r| j                  ||d
      \  }}nt        d      |j                  t        j                  k7  sJ |j                  t        j                  k7  sJ |j                  t        j                  k7  sJ t        |||||j                         |j                               ||<   t        j                  j!                  dd      dv s:t#        ||       H |S )Ni   z3Invalid num_bins. Must be in range 512 <= num_bins.r  r  zScenario : r  r   r   rp  )r  p3gUUUUUU?z,Invalid scenario. Must be in {'same', 'p3'}.)r3   r4   r7   r8   r5   r6   r  r  )rU  rB   r  r  ro   rX  r@   r   r   r  r  r2   r/  r0  r  r  r  r   )	rH   r  r  r%  r  r7   r8   avg_coefstd_coefs	            r    r  z'HistogramCollector.compute_distribution  s   ==3RSS,,$S%8$9:;+DMM?;<DMM,A./!/!5!5!7FIQ<D"1J##rzz111}}&%)]]41]%M"($&%)]]49]%U"( !OPP>>RZZ///>>RZZ///##rzz111&0%!~~'"('OF# zz~~2A6(B4,3 "86 r"   c           	         |d   }|d   }|j                   }|dz  }|dz  }|d   j                  }t        j                  ||z
  dz         }	t	        |	j                         D 
cg c]0  }
t        j
                  d|      t        j
                  d|      f2 }}
t	        ||dz   d      D ]  }
||
z
  }t        ||
z   dz   |      }||   ||   f||
|z
  <   t        j                  |||       }|j                         }t        |d|       }t        ||d       }|dxx   |z  cc<   |dxx   |z  cc<   |dk7  j                  t        j                        }t        j                  |t        j                        }|j                   |z  }t	        |      D ]  }||z  }||z   }t        |||       ||<    |dxx   t        |||z  d       z  cc<   t        j                  |j                   t        j                        }t	        |      D ]+  }||z  }||z   }t        |||       }|dk7  s!||   |z  ||| - t        |      }t        |      }||&t        j
                  t        j                  |      }n!t        j
                  t        ||      |      }||	|
|z
  <    t        j                  |	      }||   }|d   }|d   }|d   |k  r||d   f}|d   |kD  r|d   |f}t!        |d   d      sJ t!        |d   d      sJ |S c c}
w )	aF  Given a dataset, find the optimal threshold for quantizing it.
        The reference distribution is `q`, and the candidate distribution is `p`.
        `q` is a truncated version of the original distribution.
        Ref: http://on-demand.gputechconf.com/gtc/2017/presentation/s7310-8-bit-inference-with-tensorrt.pdf
        r   r   rc   r   Nr   rf   r   )r  r   r   zerosr  r  r/  r^  deepcopyr,   r*   r  r   r   r0   argminrD   )rH   r  rV  r7   r8   rU  zero_bin_indexnum_half_quantized_binr   kl_divergencer  
thresholdsr   r   sliced_distributionpleft_outliers_countright_outliers_countnonzerosquantized_binsnum_merged_binsindexstartendqnormdivmin_kl_divergence_idxr  r7  r8  s                                  r    r  z(HistogramCollector.get_entropy_threshold7  sh    |q\
99!Q!3q!8!""2H!H1!LMTYZgZlZlTmnTmqrxx/!51IJTm
n  -~/A1EA(1,KNQ.2H=I6@6MzZcOd5eJq112"&--[0K"L $((*A"%d<K&8"9#&tIJ'7#8 aD''DbE))E Qrxx0H  XX&8IN166:LLO 12/o-(+,?c,J(Ku% 3 2#&9:L:^:`&a"bb rxx0A12/o-8E#./19#1%#84#?AeCL 3 $A&A#A&AyAIhhrvvU3hhwq!}E:8;M!445] F` !#		- 8&'<=aL	aL	Q)+!*,=a,@ AQ)+!21!5y A(+W555(+W555  U os   "5L	N)r   )rX   r[   r\   r  rL   r  r`  r  r  r  rk  r  r  staticmethodr  r  r  r_   r"   r    r_  r_  
  s]    !#e8st@@e,\* \ \*&PX!r"   r_  r   Fr   r   c                 p   d }|t         j                  k(  rp|j                  dd      }|j                  dd      }|j                  dd      }	|j                  dd       }
|j                  dd      }t        | ||||||	|
|	      }n |t         j                  k(  rI|j                  d	d
      }|j                  dd
      }|j                  dd      }t        | ||||||      }n|t         j                  k(  rI|j                  d	d      }|j                  dd      }|j                  dd      }t        | ||||||      }nH|t         j                  k(  r5|j                  d	d      }|j                  dd      }t        | |||||      }|r"|j                          |j                          |S t        d|       )Nr   Fr   r   rM  r   r   )r   r   r   r   r   r   rU  rm  rV  )r   r   rU  rV  rn  rW  ro  T)r   r   rU  rW  rX  rp  )r   rU  rX  zUnsupported calibration method )rm   rn   r  r   r   rh  r   ri  r   rj  r   r   rB   )r   r   r   calibrate_methodr   extra_options
calibratorr   r   r   r   r   rU  rV  rW  rX  s                   r    create_calibratorr    s    J,333!%%k59	&**+;UC*../CTJ#0#4#45OQU#V #''u=%! %=)1%=#


 
.66	6 $$Z5*../CSI!%%k59	&! %=1

 
.99	9 $$Z6"&&|V<
!%%k48	)! %=!

 
.;;	; $$Z6 $$Z8+! %=

   "++-
67G6HI
JJr"   )Nr   )0r   r^  r   r  r	  enumr   pathlibr   typingr   r   r   r   r	   numpyr   r   r
   r   r   r   r   quant_utilsr   r   r   r  r!   floatr   r0   r2   ra   rm   ABCMetar   r   r   rQ  rh  ri  rj  r{  r_  rn   rj   r  r_   r"   r    <module>r     s      	    9 9   > >  U U  

 " !	





 5/ 	
 ZZ8   F1 1h "ckk "4k" k"\[,~ [,|CL. CLL
+ 
D
. 
D 
0  
F" ",E!1 E!T 6:/&--"KKdKK#HSM2KKr"   