
    g                          d dl Z d dlmZ d dlmZmZ d dlmZ d dlm	Z	m
Z
 d dlmZ d dlmZ  e j                  e      Z G d d	e      Z G d
 de      Zy)    N)Union)AttentionMaskFusionAttention)NumpyHelper)	NodeProtohelper)	OnnxModel)BertOnnxModelc                   p     e Zd ZdZdedededef fdZdede	d	e	deded
ededede
e	df   fdZd Z xZS )FusionTnlrAttentionz
    Fuse TNLR Attention subgraph into one Attention node.
    TNLR Attention has extra addition after qk nodes and adopts [S, B, NH] as I/O shape.
    modelhidden_size	num_headsattention_maskc                 *    t         |   ||||       y N)super__init__)selfr   r   r   r   	__class__s        ]/var/www/openai/venv/lib/python3.12/site-packages/onnxruntime/transformers/onnx_model_tnlr.pyr   zFusionTnlrAttention.__init__   s     	YG    
mask_indexmatmuladdinputoutput
add_qk_strreturnNc	                    |dkD  sJ |dkD  r$||z  dk7  rt         j                  d| d|        y | j                  j                  |j                  d         }	| j                  j                  |j                  d         xs( | j                  j                  |j                  d         }
|	|
y t        j                  |	      }t        j                  |
      }| j                  j                  d      }|	j                  }t        j                  |      }t        j                  |dz   ||d|z  g|j                  |      j                         d	      }	| j                  j                  |	| j                         t        j                  |d
z   |d|z  g|j                  |      j                         d	      }
| j                  j                  |
| j                         ||dz   |d
z   g}||j!                  |       n|j!                  d       |"|j!                  d       |j!                  |       t        j"                  d||g|      }d|_        |j&                  j)                  t        j*                  d|      g       |S )Nr   zinput hidden size z# is not a multiple of num of heads    	Attention_qkv_weight   T)name	data_typedimsvalsraw	_qkv_bias )inputsoutputsr%   zcom.microsoftr   )loggerdebugr   get_initializerr   r   to_arraycreate_node_namer&   r   tensor_dtype_to_np_dtypemake_tensorastypetobytesadd_initializerthis_graph_nameappend	make_nodedomain	attributeextendmake_attribute)r   r   r   r   r   r   r   r   r   weightbias
qkv_weightqkv_biasattention_node_nametensor_dtypenp_typeattention_inputsattention_nodes                     r   create_attention_nodez)FusionTnlrAttention.create_attention_node    sc    1}}?i 7A=LL-k]:]^g]hij++FLLO<zz))#))A,7c4::;U;UVYV_V_`aVb;c>T\ ))&1
''-"jj99+F''11,?##$}4"q;/""7+335
 	

""64+?+?@!!${2"k/")113
 	

""4)=)=> -/+-

 !##J/##B'!##B'##J/))#H$	
 !0  '')>)>{I)V(WXr   c                    |}|j                   dk7  ry | j                  j                  |g dg d      }|
|\  }}}}}	}
ny g }t        |j                        D ]1  \  }}||vr||d   j
                  d   k(  r!|j                  |       3 t        |      dk7  ry |d   }| j                  j                  |
g dg d      }|y |\  }}}}}| j                  j                  |dgdg      }|d   }| j                  j                  |
g d	g d
      }|y |\  }}}| j                  j                  |g dg d      }|y |d   }|d   }| j                  j                  |g dg d      }|y |d   }|d   }| j                  j                  |ddgddg      }|y |j                  d   |k(  rd }|}| j                  |||| j                  | j                  ||j
                  d   |d   j                  d         }|y | j                  j                  |       | j                  | j                  |j                  <   t        j                   dd|j                  z   g|j
                  d   gd|j                  z   g d      }| j                  j#                  || j                         |j                  d   |j                  d<   d|j                  z   |j
                  d<   | j$                  j'                  ||	|
g       | j$                  j'                  |       | j$                  j'                  |       | j$                  j'                  |       | j$                  j'                  |       d| _        y y )NSkipLayerNormalization)WhereAddMatMulReshape	TransposerM   )r!   r!   r!   r   r   r   r   r!   )rO   rN   SlicerL   rM   )r!   r   r   r   r!   rO   )SoftmaxrL   rM   )r   r   r   )MulrO   rN   rP   rL   rM   )r   r   r   r   r   r!   rN   rK   back_transpose_in_back_transpose_)r!   r      )permT)op_typer   match_parent_path	enumerater   r   r9   lenrH   r   r   nodes_to_addr8   node_name_to_graph_namer%   r   r:   add_nodenodes_to_remover=   prune_graph)r   normalize_nodeinput_name_to_nodesoutput_name_to_node
start_node	qkv_nodes_matmul_belowreshape_qkvtranspose_qkv
matmul_qkvother_inputs_ir   
root_inputv_nodesr   r   upper_nodes	transposeqk_nodesadd_qk	matmul_qkq_nodesk_nodesrelative_position_bias_nodesr   attention_last_nodenew_nodeback_transposes                                 r   fusezFusionTnlrAttention.fuseh   s    $
!!%== JJ00H
	
  KTHQ<mZ":#3#34IB//	!++A..& 5 |!!!_
**..>

 ?!(Aq#vjj226K=1#NN	:://
<XZcd!)FI**..E

 ?bk**..>

 ?bk'+zz'C'CFYX_L`cdfgbh'i$'/<<?j(J"- 11  #**1-,Q/55a8	H $$X.:>:N:ND((7 $--%56#$!HMM1N JJ0D0DE ) 2HNN1!5!EHOOA  '')<mZ(XY  ''1  ''0  ''0  ''0  $DS )r   )__name__
__module____qualname____doc__r	   intr   r   strr   r   rH   r{   __classcell__r   s   @r   r   r      s    
HH H 	H
 &HFF F 	F
 F F F F F 
y$	FPq$r   r   c                   $     e Zd Z fdZd Z xZS )TnlrOnnxModelc                     t         |   |||       t        |       | _        t	        | | j
                  | j                  | j                        | _        y r   )r   r   r   r   r   r   r   attention_fusion)r   r   r   r   r   s       r   r   zTnlrOnnxModel.__init__   sE    	;7+D1 3D$:J:JDNN\`\o\o pr   c                 8    | j                   j                          y r   )r   apply)r   s    r   fuse_attentionzTnlrOnnxModel.fuse_attention   s    ##%r   )r|   r}   r~   r   r   r   r   s   @r   r   r      s    q
&r   r   )loggingtypingr   fusion_attentionr   r   fusion_utilsr   onnxr   r   
onnx_modelr	   onnx_model_bertr
   	getLoggerr|   r.   r   r    r   r   <module>r      sI   
   ; $ "   )			8	$H$/ H$V&M &r   