
    gj                         d dl Z d dlmZmZ d dlZd dlZd dlZ	 d dlm	Z	 ddlmZ ddlmZ ddlmZmZmZmZmZmZmZmZmZmZ ddlmZ  G d	 d
      Z G d d      Zy# e
$ r dZ	Y Hw xY w)    N)AnyDict)to_array_extended   )
TensorData)	ONNXModel)
ONNX_TYPE_TO_NP_TYPETENSOR_NAME_QUANT_SUFFIXfind_by_namemodel_has_infer_metadatanormalize_axispack_bytes_to_4bitquantize_dataquantize_nparray&save_and_reload_model_with_shape_infertensor_proto_to_array)TensorQuantOverridesHelperc                   B    e Zd Zdeeef   fdZd	dZd Zd Z	d Z
d Zy)
QuantizationParamsdatac                 :   i | _         |j                         D ]   \  }}t        |t              st	        dt        |       d|d      |dk7  r@t        |t        t        t        j                  f      st	        dt        |       d|d      |dk(  r*t        |t              s|t	        dt        |       d      |dk(  rG|j                  t        j                  t        j                  fvrt        d|j                   d|      || j                   |<    y )	NzKeys must be strings not z for k=.axisz1Values must be numpy arrays, int, float, str not z'Axis value must be an int or None, not scalez5scale must a float32 or float16 numpy element but is )r   items
isinstancestr	TypeErrortypeintnpndarraydtypefloat32float16
ValueError)selfr   kvs       \/var/www/openai/venv/lib/python3.12/site-packages/onnxruntime/quantization/base_quantizer.py__init__zQuantizationParams.__init__%   s   	JJLDAqa%";DG9GA5PQ RSSF{:a#sBJJ1G#H"STXYZT[S\\cdechhi jkkF{:a#5!-"I$q'RS TUUG|

BJJ/G G #XYZY`Y`Xaahijhm!nooDIIaL !    Nc                 :    | j                   j                  ||      S N)r   get)r'   keydefault_values      r*   r/   zQuantizationParams.get2   s    yy}}S-00r,   c              #   8   K   | j                   E d {    y 7 wr.   r   r'   s    r*   __iter__zQuantizationParams.__iter__5   s     99s   c                      | j                   |   S r.   r3   )r'   r0   s     r*   __getitem__zQuantizationParams.__getitem__8   s    yy~r,   c                 "    || j                   |<   y r.   r3   )r'   r0   values      r*   __setitem__zQuantizationParams.__setitem__;   s    		#r,   c                 ,    t        | j                        S r.   )lenr   r4   s    r*   __len__zQuantizationParams.__len__>   s    499~r,   r.   )__name__
__module____qualname__r   r   r   r+   r/   r5   r7   r:   r=    r,   r*   r   r   $   s/    tCH~ 1r,   r   c                       e Zd Z	 ddZdej
                  j                  defdZd Z	d Z
d Zd	 Zd
 Zd ZddZddZ	 	 ddZd Zy)BaseQuantizerNc                    t        |      st        |      }|j                  j                  D ci c]  }|j                  | c}| _        | j
                  j                  |j                  j                  D ci c]  }|j                  | c}       | j
                  j                  |j                  j                  D ci c]  }|j                  | c}       t        |      | _
        || _        || _        |
r|
ni | _        d| j                  v xr | j                  d   | _        d | _        d| j                  v xr | j                  d   | _        | j                  j#                  dd       | _        | j                  j#                  dd      | _        | j                  j#                  d      | _        t+        |d|      | _        t+        |d|      | _        	 |Qt1        t3        d |j5                                     r-t7        d	t9        d
 |j5                         D               d      || _        || _        || _        |	| _         | jC                         | _"        tG        | j                  j#                  di             | _$        | j                  jK                         D ci c]  }|j                  | c}| _&        | jH                  jO                  | jL                  | j
                  jQ                         |      \  }}|stS        |      | jH                  jU                         | _+        y c c}w c c}w c c}w c c}w )NEnableSubgraphForceQuantizeNoInputCheckWeightSymmetricActivationSymmetricFMinimumRealRangetensor_typec                 $    t        | t               S r.   )r   r   )ts    r*   <lambda>z(BaseQuantizer.__init__.<locals>.<lambda>w   s    z!Z?X;Xr,   z(tensors_range contains unexpected types c              3   2   K   | ]  }t        |        y wr.   )r   ).0r)   s     r*   	<genexpr>z)BaseQuantizer.__init__.<locals>.<genexpr>y   s     >gPf1tAwPfs   z, not TensorData.TensorQuantOverrides),r   r   graph
value_infonamevalue_infosupdateoutputinputr   modelper_channelreduce_rangeextra_optionsenable_subgraph_quantizationparentforce_quantize_no_input_checkr/   _is_weight_symmetricis_activation_symmetricmin_real_rangegetattractivation_qTypeweight_qTypeanymapvaluesr   settensors_rangenodes_to_quantizenodes_to_excludeop_types_to_quantizecheck_opset_versionopset_versionr   tensor_quant_overridesinitializerinitializersis_validkeysr&   get_quant_typestensor_quant_override_qtypes)r'   rY   rZ   r[   re   rd   rj   rk   rl   rm   r\   viotitinitzeroverrides_validoverrides_errs                    r*   r+   zBaseQuantizer.__init__C   s    (.:5AE27++2H2HI2HBBGGRK2HIu{{7I7I J7I"7I JKu{{7H7H I7H"7H IJu%
&(.;] 2 22[t7I7IJZ7[ 	) '4+=+==q$BTBTUpBq 	* 261C1C1G1GHY[_1`!'+'9'9'='=>SUZ'[$"00445GH '(8-IY Z#L-N
	 $S1XZgZnZnZp-q)r:3>gP]PdPdPf>g;g:hhyz  +!2 0$8!!557 'AASASAWAWXnprAs&t#BF**BXBXBZ[BZwW\\72BZ[)-)D)D)M)Mt//4468H*
& ]++,0,G,G,W,W,Y)u J J Ib \s   L/8L4 L9$L>weight_quant_typereturnc                    | j                   | j                   S |t        j                  j                  t        j                  j                  t        j                  j
                  t        j                  j                  fv S r.   )r`   onnxTensorProtoINT4INT8INT16FLOAT8E4M3FN)r'   r}   s     r*   is_weight_symmetricz!BaseQuantizer.is_weight_symmetric   sh    $$0,,, !!!!""))	%
 
 	
r,   c                     t         r.   )NotImplementedErrorr4   s    r*   quantize_modelzBaseQuantizer.quantize_model   s    !!r,   c                 R    t        || j                  j                               }|d uS r.   )r   rY   rq   )r'   
input_namerq   s      r*   is_input_a_initializerz$BaseQuantizer.is_input_a_initializer   s&    ":tzz/E/E/GH$&&r,   c                     | j                   S r.   )rZ   r4   s    r*   is_per_channelzBaseQuantizer.is_per_channel   s    r,   c                 6   t        || j                  j                               }|@|j                  t        j
                  j                  t        j
                  j                  fv S | j                  r| j                  y| j                  j                  |      S )NF)r   rY   rq   	data_typer   r   FLOATFLOAT16r]   r^   is_valid_quantize_weight)r'   weight_nameweights      r*   r   z&BaseQuantizer.is_valid_quantize_weight   sy    k4::+A+A+CD##(8(8(>(>@P@P@X@X'YYY11t{{7J{{33K@@r,   c                     | j                   1t        | j                         dk7  r|j                  | j                   vry|j                  | j                  vry| j
                  |j                  | j
                  v ryy)Nr   FT)rk   r<   rT   op_typerm   rl   )r'   nodes     r*   should_quantize_nodez"BaseQuantizer.should_quantize_node   sn    "".D**+q0		!7!77<<t888  ,d>S>S1Sr,   c                 4   | j                   j                   j                  D cg c]   }|j                  r|j                  dk(  s|" }}t        |      dk7  rt	        d      |d   j
                  }|dk(  rt        j                  d| d       y|dk  rt        j                  d| d       | j                   j                   j                  j                  |d          | j                   j                   j                  j                  t        j                  j                  d	d
      g       d
}|dk  r| j                  t        j                  j                  k(  rt        j                  d| d       | j                   j                   j                  j                  |d          | j                   j                   j                  j                  t        j                  j                  d	d      g       d| j                   j                   _        d}|S c c}w )Nzai.onnxr   z$Failed to find proper ai.onnx domainr   
   z$The original model opset version is ze, which does not support node fusions. Please update the model to opset >= 11 for better performance.z, which does not support quantization. Please update the model to opset >= 11. Updating the model automatically to opset 11. Please verify the quantized model.       z, which does not support quantization to float 8. Please update the model to opset >= 19. Updating the model automatically to opset 19. Please verify the quantized model.	   )rY   opset_importdomainr<   r&   versionloggingwarningremoveextendr   helpermake_opsetidre   r   r   
ir_version)r'   opsetai_onnx_domainro   s       r*   rn   z!BaseQuantizer.check_opset_version   s   #zz//<<
<eELLTYT`T`dmTmE< 	 
 ~!#CDD&q)11BOO6}o  Fk  l 2OO6}o  Fe  f JJ))001BCJJ))00$++2J2J2r2R1STM2$"3"3t7G7G7T7T"TOO6}o F5 5
 JJ))001BCJJ))00$++2J2J2r2R1ST*+DJJ'MA
s    HHc                 
   t        || j                  j                               }t        |      }|t        z   }| j
                  t        j                  j                  k(  r0t        j                  |      }|j                  t        j                  k(  rt        j                  j                  }	nQ|j                  t        j                  k(  rt        j                  j                  }	nt!        d|j                   d      |j#                  t        j                        }
t        j$                  dg|
j                        }|j'                  d      }t        j(                  j+                  |
|      }| j                  j-                  |g       d}n||z  |z  }t        j                  |t        j.                        t        j                  |t        j.                        z  }
|
j1                         }
t        j.                  t        j2                  t        j4                        j6                        }t        j.                  t        j2                  t        j4                        j8                        }t        j:                  |
|k        st        j:                  |
|kD        rt=        j>                  d| d       t        j@                  |
||      j#                  t        j4                        }
t        j                  |
t        j4                        j'                  |jB                        }t        j(                  j+                  ||      }| j                  j-                  |g       t        j                  ||j                        j'                  d      }d	}| j
                  }	|d
z   }t        j(                  j+                  ||      }| j                  j-                  |g       | j
                  t        j                  j                  k(  r| j
                  }nt        j                  jD                  }|dz   }| j
                  t        j                  j                  k(  r/t        jF                  jI                  || j
                  dgdg      }n|jJ                  dkD  r_t        jL                  |jN                  t        j4                        j'                  d      }t        j(                  j+                  ||      }n#t        jF                  jI                  ||g dg      }| j                  j-                  |g       ||||||	fS )z]
        Quantized the bias. Zero Point == 0 and Scale == Input_Scale * Weight_Scale
        zEOnly float16 or float32 are supported with float 8 but bias dtype is r   r   r#   CastzQuantized bias `z<` exceeds the range of a int32. The bias scale is too small.DequantizeLinear_scale_zero_point        r   )(r   rY   rq   r   r
   re   r   r   r   r!   asarrayr#   r%   r   r$   r   r   astypearrayreshapenumpy_helper
from_arrayinitializer_extendfloat64roundiinfoint32minmaxrf   r   r   clipdimsINT32r   make_tensorsizezerosshape)r'   	bias_nameinput_scaleweight_scalebetabias_initializer	bias_dataquantized_bias_namer   
node_qtypequantized_data
bias_scalebias_scale_datapacked_bias_initializer	node_type	int32_min	int32_maxbias_np_dataquantized_bias_scale_namepacked_bias_scale_initializerrJ   quantized_bias_zp_namepacked_bias_zp_initializerbias_zp_datas                           r*   quantize_bias_static_implz'BaseQuantizer.quantize_bias_static_impl   s    (	4::3I3I3KL)*:;	'*BB  0 0 = ==::i(DzzRZZ'!--55
rzz)!--33
"ghlhrhrgsst uvv![[4N1#^-A-ABJ(004O&*&7&7&B&B>Sf&g#JJ))+B*CDI %|3d:J  ZZ	DrzzR\dfdndnGooN+113N 

288BHH#5#9#9:I

288BHH#5#9#9:Ivvny01RVVNY<V5W&yk1mn  WW^Y	JQQRTRZRZ[N ::nBHHEMMN^NcNcdL&*&7&7&B&B<Qd&e#JJ))+B*CD !jj9??KSSTVWO*I**J %8($B!(,(9(9(D(D_Vo(p%

%%'D&EF  0 0 = ==++K**00K!4}!D 0 0 = ==)-)@)@AWY]YjYjmnloruqv)w&__q 88J$4$4BHHEMMbQL)-):):)E)ElTj)k&)-)@)@AWYdfhkljm)n&

%%'A&BC  %"
 	
r,   c                 4   |j                   t        z   }|j                   dz   }|j                   dz   }t        |      }| j                  j	                  |j                   i       }	d|	v r|	d   j
                  }d|	v rd|	v rt        j                  |	d   t        |         }
t        j                  |	d         }t        ||j                         ||
      }t        |
t        j                        sJ dt        |
              |
j                  t        j                  k7  r|
j                  t        j                   k7  sJ d	|
j                          t        |t        j                        saJ dt        |              || j"                  k(  r| j%                  |      n| j&                  }t)        |j                         ||	j+                  d
|      |	j+                  d| j,                  xr |      | j.                  |	j+                  d      |	j+                  d            \  }
}}t        |
t        j                        sJ dt        |
              |
j                  t        j                  k7  r|
j                  t        j                   k7  sJ d	|
j                          t        |t        j                        sJ dt        |              |j0                  }t2        j4                  j7                  ||g |j9                  d      j;                               }t2        j4                  j7                  ||g |
j9                  d      j;                               }| j<                  j?                  ||g       |s| j"                  t2        j@                  jB                  k(  r,t3        j@                         }| j"                  |_        |jD                  jG                  |jD                         ||_         |j                         jI                         jK                         |_&        tN        tO        |      }|jP                  |jP                  k7  s"|jK                         |jK                         k7  rtS        d|jP                   d|jK                         dd  d|jK                         dd  d|jP                   dtU        |      dd  d      |t2        j@                  jV                  t2        j@                  jX                  fv r|j                  t        jZ                  t        j\                  fvrtS        d| d      t_        ta        |jK                                     }t2        j4                  j7                  |||jD                  |d      }nmt        jb                  |t2        j4                  je                  |            j9                  |jD                        }t2        jf                  ji                  ||      }| j<                  j?                  |g       |||fS )a  
        :param weight: TensorProto initializer
        :param qType: type to quantize to
        :param keep_float_weight: Whether to quantize the weight. In some cases, we only want to qunatize scale and zero point.
                                  If keep_float_weight is False, quantize the weight, or don't quantize the weight.
        :return: quantized weight name, zero point name, scale name
        r   r   default_val
quant_typer   
zero_pointr   Unexpected type Unexpected dtype 	symmetricr[   rminrmaxr[   rb   rmin_overridermax_override)r   NzThe initializer of shape z! could not be created, expecting r   z, got z and shape=z
raw=   r   Quantized weights for . must be 8-bit before packing as 4-bit values.Traw)5rT   r
   r   rp   get_per_tensor_overridesrJ   r!   r   r	   r   flattenr   r"   r   r#   r$   r%   re   r   ra   r   r/   r[   rb   r   r   r   r   r   tolistrY   r   r   r   r   r   copytobytesraw_datar   r   RuntimeErrorr   r   UINT4int8uint8bytesr   r   tensor_dtype_to_np_dtyper   r   )r'   r   qTyper[   keep_float_weightq_weight_namezp_name
scale_nameweight_dataquant_overridesr   r   q_weight_datar   scale_dtypescale_initializerzero_initializerq_weight_initializercheckpacked_datas                       r*   quantize_initializer_implz'BaseQuantizer.quantize_initializer_impl1  sJ    &>>++-[[8+
 ,F355NNv{{hjNk?*#L1==Eo%,/*I/,"?G[\aGbcJHH_W56E,UK4G4G4I5R\]Mj"**5\9I$zJZI[7\\5  BJJ.:3C3Crzz3Q6":#3#3"456QeRZZ0R4DT%[M2RR0 <ADDUDU;U007[_[w[wI/<##%##K;,00ARARAcWcd#22-11&9-11&90,J} j"**5\9I$zJZI[7\\5  BJJ.:3C3Crzz3Q6":#3#3"456QeRZZ0R4DT%[M2RR0&& KK33JRQVQ^Q^_dQeQlQlQno;;227E2zGYGYZ_G`GgGgGij

%%'8:J&KL   D$4$4$A$AA'+'7'7'9$151B1B$.$))00=,9$)0=0E0E0G0L0L0N0V0V0X$-$0 ..BCE{{k&7&775==?mNcNcNe;e*78I8I7JJk,446s;<F5==?SVTVCWBXXcdjdpdpcq$S)=%>t%D$EQH 
 4++00$2B2B2H2HII &&rww.AA&0?mn  $$6}7L7L7N$OP (,{{'>'>}eU[U`U`bmsw'>'x$ "

=@d@dej@k l t tKK! (,'8'8'C'CMS`'a$JJ))+?*@Agz11r,   c                 t   t        || j                  j                               }|t        d|      t	        |      }t        |j                        }t        ||      \  }	}
|	st        d| d| d|       |
}|j                  |   }| j                  j                  |d|ig      }t        |      }|dk7  r||k7  rt        d| d	| d
      t        |d   d   |      \  }}|r||k7  rt        d| d| d|d   d    d      d|d   v r|d   d   j                  }|d   j                  d| j                  |            }|d   j                  d| j                  xr |      }g }g }g }t        |j                        }t        |      }d||<   t        |      D ]  }|j!                  ||      }||k  r|nd}||   }d|v r0d|v r+t#        j$                  |d   t&        |         }t#        j$                  |d         }t)        ||j+                         ||      }t-        |t"        j.                        sJ dt1        |              |j2                  t"        j4                  k7  r|j2                  t"        j6                  k7  sJ d|j2                          t-        |t"        j.                        sJ dt1        |              t-        |t"        j.                        s4J dt1        |              t9        |j+                         |||| j:                  |j                  d      |j                  d            \  }}}t-        |t"        j.                        sJ dt1        |              |j2                  t"        j4                  k7  r|j2                  t"        j6                  k7  sJ d|j2                          t-        |t"        j.                        sJ dt1        |              t-        |t"        j.                        sJ dt1        |              |j=                  |       |j=                  |       |j=                  t#        j>                  |      jA                  |              t#        jB                  ||      }|tD        z   }|dz   }|dz   } |jF                  |   g}!tH        jJ                  jM                  | |jN                  |!t#        jP                  |      jS                               }"tH        jJ                  jM                  |||!t#        jP                  |      jS                               }#| j                  jU                  |"|#g       |s]|tH        jV                  jX                  tH        jV                  jZ                  fv r|j2                  t"        j\                  t"        j^                  fvrta        d| d      tc        te        |jg                                     }$tH        jJ                  jM                  ||||$d       }%| j                  jU                  |%g       nt#        j>                  |tH        jJ                  ji                  |            jA                  |jF                        }tH        jj                  jm                  ||      }%| j                  jU                  |%g       ||| fS )!Nz{} is not an initializerzWeight z# has a per-channel axis with value z  that is out-of-bounds for rank r   r   r   z.Per-channel tensor quantization overrides for z must have either 1 or z& elements in the list of dictionaries.r   z"Tensor quantization overrides for z& specify an unexpected axis. Expected z
, but got r   r   r   r[   r   r   r   r   r   r   r   r   r   r   r   r   Tr   )7r   rY   rq   r&   r   r<   r   r   rp   get_per_channel_overridesrJ   r/   r   r[   listrangetaker!   r   r	   r   r   r   r"   r   r#   r$   r%   r   rb   appendr   r   concatenater
   r   r   r   r   r   hstackr   r   r   r   r   r   r   r   r   r   r   r   r   r   )&r'   r   re   channel_axisr[   r   rq   weightsweights_rankis_axis_valid	axis_normchannel_countquant_overrides_for_channelsnum_channel_overridesis_axis_override_validaxis_overrider   zero_point_list
scale_listquantized_per_channel_data_listweights_shapereshape_dimsiper_channel_datachannel_override_indexchannel_quant_overridesr   r   quantized_per_channel_dataquantized_weightsr   r   r   zero_scale_shaper  r  r	  r  s&                                         r*    quantize_weight_per_channel_implz.BaseQuantizer.quantize_weight_per_channel_impl  s    #;

0F0F0HI7EE'47==)#1,#M y+&I, X**69 
 !l3'+'B'B'\'\v|&<%= (] (
$ !$$@ A A%*?=*P@ N,o-SU 
 1??[\]?^_e?fht0u-%,)F4[M B(>4PQR4STZ4[3\\]_  7::7:<HTTL0377TE]E]^jEkl	3A6::>4K\K\Kmamn
*,'W]]+M*%&\"}%A&||A|<*+.C*CQ"&BCY&Z#11lF]6]XX&=l&KSghtSuv
!8!AB-= "2":":"<eZ.* "*bjj9`=MdS]N^M_;``9$$

2z7G7G2::7U:&z'7'7&89:U!%4V8He6VV4!.

 I%d+E&F%GHI 
 AN$,,. !-#'#6#6"9"="=f"E"9"="=f"EA=
E#= "*bjj9`=MdS]N^M_;``9$$

2z7G7G2::7U:&z'7'7&89:U!%4V8He6VV4!.

 I%d+E&F%GHI  "":.e$+222::>X3Y3a3abn3opU &Z NN+JLY#&>>- 8+
 (,,\:; KK33--/?:AVA]A]A_
  ;;22\#3RYY5O5V5V5X
 	

%%'8:J&KL  0 0 5 5t7G7G7M7MNN$**277BHH2EE&0?mn  $$67H7P7P7R$ST (,{{'>'>!<QU (? ($ 

--/C.DE$&JJ%++>>|L% '+**+ " (,'8'8'C'CDUWd'e$

--/C.DEgz11r,   c                 B   | j                   y | j                  j                         D ]t  }|j                  dv r| j	                  |      s$t        | j                  j                         |j                  d            dk7  r[|j                  d   | j                   vs|j                  d   | j                   vr| j                   |j                  d      }t        |t              s(t        dt        |       d|j                  d   d      || j                   |j                  d   <   |j                  dk(  s| j	                  |      s(t        t        j                  d      t        j                  d	      
      | j                   |j                  d   <   w y )N)ClipRelur   r   r   z for r   Softmaxr         ?)lowesthighest)rj   rY   nodesr   r   r<   input_name_to_nodesrX   rW   r   r   r   r   r!   r$   )r'   r   tds      r*   adjust_tensor_rangesz"BaseQuantizer.adjust_tensor_ranges  sU   %JJ$$&D||//006tzz557

1FG1L::a=(:(::dkk!nTXTfTf>f''A7!"j1#&6tBxjdkkRSnEWWX$YZZ46""4::a=1*0065?rzzRU`b`j`jkn`o5p""4;;q>2# 'r,   r.   )r.  )FF)TF)r>   r?   r@   r+   r   r   DataTypeboolr   r   r   r   r   r   rn   r   r
  r)  r4  rA   r,   r*   rC   rC   B   sr     IZV
T5E5E5N5N 
SW 
"' A !FR
hY2@ L2\qr,   rC   )r   typingr   r   numpyr!   r   onnx.numpy_helperonnx.reference.op_runr   ImportError	calibrater   
onnx_modelr   quant_utilsr	   r
   r   r   r   r   r   r   r   r   rp   r   r   rC   rA   r,   r*   <module>r?     sj        7
 " !   ? <mq mqg  s   A A)(A)