
    g
                     L    d dl Z d dl mZ ddlmZmZ ddlmZ 	  G d de      Zy)	    N)onnx_pb   )attribute_to_kwarg	ms_domain   )QuantOperatorBasec                   .     e Zd Z fdZd Z fdZ xZS )AttentionQuantc                 &    t         |   ||       y N)super__init__)selfonnx_quantizer	onnx_node	__class__s      a/var/www/openai/venv/lib/python3.12/site-packages/onnxruntime/quantization/operators/attention.pyr   zAttentionQuant.__init__   s    3    c                 L    | j                   j                  | j                        S r   )	quantizershould_quantize_nodenode)r   s    r   should_quantizezAttentionQuant.should_quantize   s    ~~22499==r   c                    | j                   }|j                  dk(  sJ |j                  D ]!  }|j                  dk(  st        |          c S  | j                  j                  |dg      \  }}}}| j                  j                  |dgdd      \  }}}	}
|j                  |       |j                  |       |j                  |	       |j                  |
       |t        |          S |j                  sdn|j                  d	z   }g }|j                  |       |j                  |j                  d
   g       |j                  |       |j                  t        |j                        dkD  r|j                  d   ndg       |j                  |       |j                  t        |j                        dkD  r|j                  d   ndg       i }|j                  D ]  }|j                  t        |              t        |d<   t        j                   j"                  d||j$                  |fi |}|j'                  |       | j                  xj(                  |z  c_        y)z
        parameter node: Attention node.
        parameter new_nodes_list: List of new nodes created before processing this node.
        return: a list of nodes in topological order that represents quantized Attention node.
        	Attentionqkv_hidden_sizesr   r   T)reduce_rangeop_level_per_channelN _quantr         domain
QAttention)r   op_type	attributenamer   quantizer   quantize_activationquantize_weightextendinputlenupdater   r   onnxhelper	make_nodeoutputappend	new_nodes)r   r   attrquantized_input_nameszero_point_namesscale_namesnodesquantized_input_names_weightzero_point_names_weightscale_names_weightnodes_weightqattention_nameinputskwargsr&   qattention_noder   s                   r   r(   zAttentionQuant.quantize   s    yy||{***
 NNDyy..w')) # NN..taS9	
! NN**4!4^b*c	
(#$$%AB 78-.\" (7#%%$(II"499x3G+,tzz!}o&k"DJJ!(;tzz!}DE&'DJJ!(;tzz!}DEIMM,Y78 ($x++//fdkkSbmflm_%  E) r   )__name__
__module____qualname__r   r   r(   __classcell__)r   s   @r   r
   r
      s    4>6* 6*r   r
   )	r/   r   
onnx_protoquant_utilsr   r   base_operatorr   r
    r   r   <module>rJ      s#     & 7 ,
=*& =*r   