
    gd#                        d dl Z d dlZd dlZd dlmZmZ d dlZd dlmZ	 d dl
Z
d dlmZmZmZmZ d dlmZ ddlmZ ddlmZ  ej,                  e      Z G d d	      Zd
 Zedk(  r e       Zej8                  rej;                  ej<                         ej>                  Z ejB                  Z"ejF                  jI                  e"      r!ejK                  de" d        e&de" d       e
jN                  e       Z( ee(ejR                  ejT                  ejV                        Z,e,j[                          e,jP                  j]                  e"d       yy)    N)ListTuple)
GraphProto
ModelProto	NodeProtoTensorProto)quantize_matmul_bnb4   )	ONNXModel)attribute_to_kwargc                       e Zd ZdZdZdZddededefdZe	d	e
e   d
eeef   fd       Zdej                   d
ej$                  fdZdede
e   d
efdZde
e   fdZd Zy)MatMulBnb4QuantizerzMPerform 4b quantization of constant MatMul weights using FP4 or NF4 data typer   r
   Nmodel
quant_type
block_sizec                     |xs g }|t         j                  t         j                  fv sJ t        |      | _        || _        || _        t        |      | _        y N)	r   FP4NF4r   r   r   r   setnodes_to_exclude)selfr   r   r   r   s        c/var/www/openai/venv/lib/python3.12/site-packages/onnxruntime/quantization/matmul_bnb4_quantizer.py__init__zMatMulBnb4Quantizer.__init__&   sV    +1r1557J7N7NOOOOu%
$$ #$4 5    
graph_pathreturnc                     t        t        |      dz
  dd      D ]/  }||   }|j                  D ]  }|j                  | k(  s||fc c S  1 y)Nr
   )NN)rangeleninitializername)r#   r   gidgraphtensors        r   __get_initializerz%MatMulBnb4Quantizer.__get_initializer.   sR    Z1,b"5CsOE++;;$&!5=( , 6
 r   fpweightc           	         t        |j                        dk7  rt        d      |j                         j	                         }|j                  \  }}||z  }| j
                  }||z   dz
  |z  }|dz   dz  }t        j                  |d      }	t        j                  ||j                        }
t        |	||
|| j                  ||       |	|
fS )z4b quantize fp32/fp16 weight   z9Current bnb4 block quantization only supports 2D tensors!r
   uint8)dtype)r!   shape
ValueError	transposecopyr   npzerosr,   r	   r   )r   r(   
fpweight_trowscolsnumelr   
num_blocksquantized_numelpackedabsmaxs              r   bnb4_block_quantz$MatMulBnb4Quantizer.bnb4_block_quant7   s     x~~!#XYY '')..0
^^
dt__
j(1,;
 19*/9*HNN;VZT__VZ\`ar   nodegraph_stackc                 J   |j                   dk7  r|S t        j                  d|j                   d       |j                  | j                  v r%t        j                  d|j                   d       |S |j
                  d   }t        j                  ||      \  }}|t        j                  d       |S t        j                  j                  |      }t        |j                        dk7  rt        j                  d	       |S | j                  |      \  }}t        j                  j                  |      }	|j                  d
z   |	_        |j
                  D ].  }
|
j                  |k(  s|j
                  j                  |
        n t        j                  j                  |      }|j                  dz   |_        |j                   j#                  |	|g       i }|j                  \  }}||d<   ||d<   | j$                  |d<   | j&                  |d<   t        j(                  j*                  	 d|j
                  d   |	j                  |j                  g|j,                  d   g|j                  r|j                  d
z   nddd|}t        j                  d|j                   d       |S )zdIf the node is MatMul with fp32 const weight, quantize the weight with int4, and return the new nodeMatMulzstart to quantize z ...zexclude to quantize z$ as specified by nodes_to_exclude...r
   z2MatMul doesn't have const weight. Skip to quantizer*   z)MatMul weight is not 2D. Skip to quantize_Bnb4_absmaxKNr   r   r    com.microsoft)inputsoutputsr#   domainzcomplete quantization of )
MatMulBnb4)op_typeloggerdebugr#   r   inputr   %_MatMulBnb4Quantizer__get_initializeronnxnumpy_helperto_arrayr!   r-   r;   
from_arrayremover"   extendr   r   helper	make_nodeoutput)r   r<   r=   inputBBBs_graphB_arrayr9   r:   B_quantrM   absmax_tensorkwargsr4   r5   matmul_bnb4_nodes                   r   _bnb4_matmul_node_weightz,MatMulBnb4Quantizer._bnb4_matmul_node_weightM   sI    <<8#K)$))D9:99---LL/		{:^_`KA);;FKP89LLMNK##,,Q/w}}"LLDEK..w7##..v6vv'^^EzzV#%%e, $
 ))44V<VVi/##Wm$<=]]
dss#|#|;;00
JJqM7<<1C1CD[[^$(,		W$r"
 
 	04@Ar   c                    g }|d   }|j                   D ]  }|j                  D cg c]R  }|j                  t        j                  j
                  k(  s'|j                  t        j                  j                  k(  r|T }}t        |      rVi }|j                  D ]  }|j                  t        j                  j
                  k(  r9|j                  |j                         |j                  | j                  |      i}n|j                  t        j                  j                  k(  rTg }	|j                  D ]4  }
|j                  |
       |	j                  | j                  |      g       6 |j                  |	i}nt        |      }|j                  |        t        j                   j"                  |j$                  |j&                  |j(                  fd|j                  i|}|j                  | j+                  ||              |j-                  d       |j                   j                  |       |j/                          |S c c}w )Nr   r#   r<   )r<   	attributetyperO   AttributeProtoGRAPHGRAPHSr!   appendgr#   _process_subgraphgraphsrT   r   updaterU   rV   rJ   rM   rW   r`   
ClearFieldpop)r   r=   	new_nodesr%   r<   attrgraph_attrsr^   kvvaluesubgraphs              r   ri   z%MatMulBnb4Quantizer._process_subgraph   s   	BJJD !NN*D99 3 3 9 99TYY$J]J]JdJd=d *  
 ; NNDyyD$7$7$=$==#**4662"ii)?)?)LMd&9&9&@&@@ "(,H'..x8!LL$*@*@*M)NO )4 #ii//5MM"% + {{,,LL$**dkk@D		MS T::4MN7 : 	 

)$?s   AIc                 ^   | j                   j                         g}| j                   j                         }d}|D ]  }|j                  dk(  sd} |s0|j	                  t
        j                  j                  dd      g       | j                  |       | j                   j                          y )NFrE   Tr
   )
r   r%   opset_importrH   rT   rO   rU   make_opsetidri   clean_initializers)r   r=   ru   has_ms_domainopsets        r   processzMatMulBnb4Quantizer.process   s    zz'')*zz..0!E||. $ " !9!9/1!M NO{+

%%'r   r   )__name__
__module____qualname____doc__r   r   r   intr   staticmethodr   r   r   r   rN   npt	ArrayLiker1   ndarrayr;   r   r`   ri   rz    r   r   r   r      s    W C C6j 6c 6s 6 D,< {T^G^A_     2::  ,5 Y 5 T*EU 5 Zc 5 n$T*-= $L(r   r   c                     t        j                  d      } | j                  ddd       | j                  ddd       | j                  d	d
dt        j                  t        j
                  gd       | j                  dd
dd       | j                  ddd
d       | j                  d
       | j                  ddt        d
g d       | j                         S )Na  Blockwise FP4/NF4 quantization for MatMul 2D weight matrices.

A weight matrix is partitioned into blocks, where each block is a contiguous
subset inside the flattened transposed weight matrix. Each block is quantized
into a set of 4b integers with an absolute value scaling factor.
)descriptionz--input_modelTzPath to the input model file)requiredhelpz--output_modelzPath to the output model filez--quant_typeFr
   z&Quantization data type. 0: FP4, 1: NF4)r   defaultchoicesr   z--block_size@   zVBlock size for blockwise quantization. Note: bnb.nn.Linear4bit only uses block_size=64)r   r   r   z-vz	--verbose
store_true)r   action)verbosez--nodes_to_exclude+zBSpecify the nodes to be excluded from quantization with node names)nargsrc   r   r   r   )	argparseArgumentParseradd_argumentr   r   r   set_defaultsstr
parse_args)parsers    r   r   r      s    $$F $=[\
(4>]^
$((*=*A*AB5   e	   kE,O
&
Q   r   __main__zfile z already exists)r   T)/r   loggingostypingr   r   numpyr1   numpy.typingr   rO   onnx.onnx_pbr   r   r   r   onnxruntime.capi._pybind_stater	   
onnx_modelr   quant_utilsr   	getLoggerr{   rK   r   r   argsr   setLevelDEBUGinput_modelinput_model_pathoutput_modeloutput_model_pathpathexistserror	Exceptionloadr   r   r   r   quantrz   save_model_to_filer   r   r   <module>r      s!     	     G G ? ! +			8	$^( ^(B$N z<D||&''))	ww~~'(u./?@% 12/BCCDII&'EtZ^ZoZopE	MMO	KK""#4d; r   