
    g
                     z    d dl Z d dlZd dlZd dlZd dlmZ  e j                  e      Zd Z	d Z
d Z G d d      Zy)    N)Conv1Dc                 <   | j                   j                  \  }}t        j                  j	                  ||      }| j                   j
                  j                  j                         |j                   _        | j                  j
                  |j                  _        |S )N)	weightshapetorchnnLineardataT
contiguousbias)modulein_sizeout_sizelinears       ]/var/www/openai/venv/lib/python3.12/site-packages/onnxruntime/transformers/quantize_helper.py_conv1d_to_linearr      sj    ++GXXX__Wh/F++--88:FMM{{''FKKM    c                     t         j                  d       t        | j                        D ]G  }| j                  |   }t	        |t
              rt        |      }|| j                  |<   =t        |       I y)zsin-place
    This is for Dynamic Quantization, as Conv1D is not recognized by PyTorch, convert it to nn.Linear
    zreplace Conv1D with LinearN)loggerdebuglist_modules
isinstancer   r   conv1d_to_linear)modelnamer   r   s       r   r   r      s\     LL-.U^^$%ff%&v.F#)ENN4 V$ %r   c                     t        j                  | j                         d       t        j                  j                  d      dz  }t        j                  d       |S )Nztemp.p   )r   save
state_dictospathgetsizeremove)r   sizes     r   _get_size_of_pytorch_modelr'   '   sA    	JJu!8,77??8$4DIIhKr   c                   F    e Zd Zeej
                  fd       Zedd       Zy)QuantizeHelperc                    t        |        t        j                  j                  | t        j                  j
                  h|      }t        j                  dt        |               t        j                  dt        |              |S )z{
        Usage: model = quantize_model(model)

        TODO: mix of in-place and return, but results are different
        )dtypez'Size of full precision Torch model(MB):z"Size of quantized Torch model(MB):)	r   r   quantizationquantize_dynamicr   r	   r   infor'   )r   r+   quantized_models      r   quantize_torch_modelz#QuantizeHelper.quantize_torch_model/   ss     	,,==eehhooEV^c=d=>XY^>_=`ab89STc9d8efgr   c                    ddl m} ddlm}  ||      j                  j                  dd       t        j                  dt        j                  j                  |       dz           || ||dt        j                  j                  i	       t        j                  d
|        t        j                  dt        j                  j                  |      dz          y )Nr   )Path)r-   T)parentsexist_okz&Size of full precision ONNX model(MB):r   DefaultTensorType)use_external_data_formatextra_optionszquantized model saved to:z!Size of quantized ONNX model(MB):)pathlibr2   onnxruntime.quantizationr-   parentmkdirr   r.   r"   r#   r$   onnxTensorProtoFLOAT)onnx_model_pathquantized_model_pathr6   r2   r-   s        r   quantize_onnx_modelz"QuantizeHelper.quantize_onnx_model<   s     =!"))//t/L<RWW___=]_h=i<jkl %=.0@0@0F0FG		
 	/0D/EFG7H\8]_h8i7jklr   N)F)__name__
__module____qualname__staticmethodr   qint8r0   rA    r   r   r)   r)   .   s1    */++ 
 
 m mr   r)   )loggingr"   r<   r   transformers.modeling_utilsr   	getLoggerrB   r   r   r   r'   r)   rG   r   r   <module>rK      sC     	   .			8	$%m mr   