
    g                         d dl mZ d dlmZmZmZmZ d dlZd dl	m
Z
 d dlmZ d dlmZmZ d dlmZmZmZmZ d dlmZ  ee      Z G d	 d
      Z G d de
      Zy)    )	getLogger)ListOptionalTupleUnionN)Fusion)AttentionMaskFormat)FusionUtilsNumpyHelper)	NodeProtoTensorProtohelpernumpy_helper)	OnnxModelc                   D    e Zd ZdZdefdZdefdZd Zd Z	de
d	e
fd
Zy)AttentionMask:
    Fuse Attention subgraph into one Attention node.
    modelc                     || _         i | _        i | _        t        |      | _        t
        j                  | _        |j                         | _	        y N)
r   mask_indicemask_castedr
   utilsr	   MaskIndexEndmask_formatget_opset_versionopset_version)selfr   s     ^/var/www/openai/venv/lib/python3.12/site-packages/onnxruntime/transformers/fusion_attention.py__init__zAttentionMask.__init__   sD    
 '
.;;"446    r   c                     || _         y r   )r   )r   r   s     r   set_mask_formatzAttentionMask.set_mask_format!   s
    &r!   c                 f    || j                   v r|| j                   |   k(  sJ || j                   |<   y r   )r   )r   mask
mask_indexs      r   set_mask_indicezAttentionMask.set_mask_indice$   s9    4###!1!1$!7777!+r!   c                 r    t        | j                        dkD  sJ t        t        | j                              S Nr   )lenr   nextiter)r   s    r   get_first_maskzAttentionMask.get_first_mask)   s1    4##$q(((D))*++r!   inputreturnc           	         | j                   t        j                  k(  ry || j                  v r| j                  |   S | j                  j                  |      r| j                  j                  |      \  }}n | j                  j                  |      \  }}d}|r|| j                  |<   | j                   t        j                  k(  r|| j                  |<   |S | j                  j                  d      }| j                  dk  r|t        j                  d|g|g| j                  j                  dd            }|j                  j!                  t        j"                  ddg      t        j"                  d	d
      g       nd}| j                  j%                  |      C| j                  j'                  t        j(                  |t*        j,                  dgdgd             t        j                  d||g|g| j                  j                  dd            }|j                  j!                  t        j"                  d	d
      g       | j                  j/                  |       || j                  |<   |S )NTr&      	ReduceSumMaskReduceSuminputsoutputsnameaxes   keepdimsr   ort_const_1_reduce_sum_axesFr7   	data_typedimsvalsraw)r   r	   NoMaskr   r   find_graph_inputr   cast_graph_input_to_int32cast_input_to_int32r   r   create_node_namer   r   	make_node	attributeextendmake_attributeget_initializeradd_initializermake_tensorr   INT64add_node)r   r.   casted
input_name	cast_nodeoutput_namemask_index_node	axes_names           r   process_maskzAttentionMask.process_mask-   s-   2999D$$$##E** ::&&u-!%!E!Ee!LFJ$(JJ$B$B5$I!J	F&0DU# 2@@@&0DU# jj11,?"$.."|$ZZ00oN	O %%,,f.C.CFQC.PRXRgRghrtuRv-wx 6Izz)))4<

**&&&"-"3"3SS! %.."I.$ZZ00oN	O %%,,f.C.CJPQ.R-ST

O,"-r!   N)__name__
__module____qualname____doc__r   r    r	   r#   r'   r-   strrU    r!   r   r   r      s=    7i 7'+> ',
,8# 8# 8r!   r   c            )           e Zd ZdZdddddgfdededed	ee   d
edede	e
   f fdZdedeeef   fdZdedeeef   fdZdefdZde
fdZde
de
de
fdZde
de
de
e
ffdZde
de
de
fdZde
de
fdZded eedf   d!eedf   d"e
deedf   f
d#Zd$ed%ed&eded eedf   d!eedf   dedeedf   fd'Z	 	 	 	 	 	 	 d5d$ed%eee
df   d&eee
df   ded eedf   d!eedf   deded(e
d)e
de
de
de
d*e
d+e
d,edeedf   f"d-Z	 	 	 	 	 	 	 d6d.e
d$ed%ed&eded ed!ededed/e
d(e
d0e
de
de
d*e
d+e
d1ee   d2edeedf   f&d3Zd4 Z xZS )7FusionAttentionr   NFSkipLayerNormalizationLayerNormalizationr   hidden_size	num_headsattention_maskuse_multi_head_attention!disable_multi_head_attention_biassearch_op_typesc                     |rdnd}t         	|   |||       || _        || _        |r|n
t	        |      | _        || _        || _        d | _        d| _	        d| _
        d | _        d| _        y )NMultiHeadAttention	AttentionT)superr    r`   ra   r   rb   rc   rd   mask_filter_valuenum_heads_warninghidden_size_warningshape_infershape_infer_done)
r   r   r`   ra   rb   rc   rd   re   attention_op_name	__class__s
            r   r    zFusionAttention.__init__m   s     5M0R] 1?C&"0>nMRWDX(@%1R.!% "&#'  $r!   concatr/   c                    t        |j                        dk(  r| j                  j                  |j                  d         }| j                  j                  |j                  d         }t	        |t
        j                        rH|j                  dk(  r9t	        |t
        j                        r|j                  dk(  r|d   |d   |d   z  fS | j                  | j                  fS )aU  
        Detect num_heads and hidden_size from Concat node in the following subgraph:

        SkipLayerNormalization or EmbedLayerNormalization
                        /        |
                     MatMul    Shape
                        |        |
                       Add     Gather(indices=0)
                        |        |
                        |      Unsqueeze
                        |        |
                        |     Concat (*, -1, 12, 64)
                        |     /
                       Reshape
                          |
                       Transpose
                 r9   r   )
r*   r.   r   get_constant_value
isinstancenpndarraysizera   r`   )r   rq   ra   	head_sizes       r   )get_num_heads_and_hidden_size_from_concatz9FusionAttention.get_num_heads_and_hidden_size_from_concat   s    $ v||!

55fll1oFI

55fll1oFI9bjj1NNa'y"**5NNa' |Yq\IaL%@@@~~t////r!   	reshape_qc                    | j                   j                  |j                  d         }|{| j                   j                  |d      }| |j                  dk(  r| j                  |      S t        j                  |j                  d    d       | j                  | j                  fS t        j                  |      }t        |      dk7  s|d   dk  s|d   dk  r1t        j                  d| d	       | j                  | j                  fS |d   }|d   }||z  }| j                  dkD  rH|| j                  k7  r9| j                  r-t        j                  d
| j                   d| d       d| _        | j                  dkD  rH|| j                  k7  r9| j                  r-t        j                  d| j                   d| d       d| _        ||fS )zDetect num_heads and hidden_size from a reshape node.

        Args:
            reshape_q (NodeProto): reshape node for Q

        Returns:
            Tuple[int, int]: num_heads and hidden_size
        r9   Concatz is not initializer.rs   rt   r   ru   zq_shape_value=z7. Expected value are like [0, 0, num_heads, head_size].z--num_heads is z. Detected value is z. Using detected value.Fz--hidden_size is )r   rJ   r.   
get_parentop_typer|   loggerdebugra   r`   r   to_arrayr*   rk   warningrl   )r   r}   q_shaperq   q_shape_valuera   r{   r`   s           r   get_num_heads_and_hidden_sizez-FusionAttention.get_num_heads_and_hidden_size   s    **,,Y__Q-?@?ZZ**9a8F!fnn&@EEfMMLLIOOA.//CDE>>4#3#333#,,W5}"}Q'71'<a@PTU@ULL>-8opq>>4#3#333!!$	!!$	)+>>A)t~~"=%%0@@TU^T__vwx).&aK43C3C$C'''(8(8'99Mk]Zqr ,1(+%%r!   add_qkc                    | j                   s(| j                  j                  d      | _        d| _         | j                  y | j                  j	                  |j
                  d         }| j                  j	                  |j
                  d         }||t        j                  d| d       y ||k7  rt        j                  d| d       y |j
                  d   S )	NT)updater   r9   zone of the inputs of z is Nonezthe shape of two inputs of z is not same)rn   r   infer_runtime_shaperm   get_edge_shaper.   r   r   )r   r   input_0_shapeinput_1_shapes       r   get_add_qk_strzFusionAttention.get_add_qk_str   s    $$#zz==T=JD$(D!#((77QH((77QH M$9LL0ABM)LL6vhlKL||Ar!   c                    |dz   t        t        fd| j                              }t        |      dk(  rS t        |      dk(  sJ | j                  j                  d      }t        j                  dt        | j                        D cg c]  }| c}g|d      }| j                  j                  |       | j                  | j                  |<   S c c}w )N_maskc                 (    | j                   d   k(  S r)   )output)nodemask_output_names    r   <lambda>z0FusionAttention.reshape_add_qk.<locals>.<lambda>   s    t{{1~AQ/Qr!   r9   r   r   r5   r6   r7   axis)listfilternodes_to_addr*   r   rE   r   rF   rangera   appendthis_graph_namenode_name_to_graph_name)r   r   concat_nodeconcat_node_name_concat_add_qk_fp32r   s         @r   reshape_add_qkzFusionAttention.reshape_add_qk   s     "G+ 6"QSWSdSdef{q ##;1$$$::66x@#--$)$..$9:$9qF$9:%&!
 	  !349=9M9M$$%56 ;s   	C
past_kpast_vc                 F   | j                   j                  d      }| j                   j                  d      }|dz   j                  dd      }|dz   j                  dd      }t        j                  d|g|g|dg      }t        j                  d|g|g|dg      }| j
                  j                  |       | j
                  j                  |       | j                  | j                  |<   | j                  | j                  |<   | j                   j                  d      }	|j                  dd	      j                  dd      j                  d
d      }
t        j                  d||g|
g|	d      }| j
                  j                  |       | j                  | j                  |	<   |
S )zConcatenate past_k and past_v inputs to create past_kv input.

        Args:
            past_k (str): name of past K value
            past_v (str): name of past V value

        Returns:
            kv_output_name (str): name of past KV value
        	Unsqueeze_5d.r   r   )r5   r6   r7   r8   r   z.valuez.kv_value_kvr   )	r   rE   replacer   rF   r   r   r   r   )r   r   r   unsqueeze_k_nameunsqueeze_v_name	k_5d_name	v_5d_namek_5dv_5dr   kv_output_name	concat_kvs               r   r   zFusionAttention.concat_kv   s     ::66{C::66{Ce^,,S#6	e^,,S#6	8K!
 8K!
 	  &  &9=9M9M$$%569=9M9M$$%56  ::66x@%8@@cJRRS[]bc$$y)#$!
	 	  +9=9M9M$$%56r!   c                    d}| j                   j                  |      }|it        j                  t	        j
                  dd| j                   j                  gd      |      }| j                   j                  || j                         | j                   j                  d      }| j                   j                  d      }|dz   j                  d	d
      }|dz   j                  d	d
      }t        j                  d||g|g|      }	t        j                  d||g|g|      }
| j                  j                  |	       | j                  j                  |
       | j                  | j                  |<   | j                  | j                  |<   ||fS )ah  Reshape past_k and past_v from 4D to 3D to use as inputs for multihead attention node.

        Args:
            past_k (str): name of past K value of shape 4D
            past_v (str): name of past V value of shape 4D

        Returns:
            k_3d (str): name of past K value of shape 3D
            v_3d (str): name of past V value of shape 3D
        kv_4d_to_3dr   int64dtyper7   Reshape_3dr   r   r4   )r   rJ   r   
from_arrayrx   arrayr`   rK   r   rE   r   r   rF   r   r   r   )r   r   r   new_dims_namenew_dimsreshape_k_namereshape_v_name	k_3d_name	v_3d_namek_3dv_3ds              r   
reshape_kvzFusionAttention.reshape_kv4  sm    &::--m<#..!R!7!78H}H JJ&&x1E1EF44Y?44Y?e^,,S#6	e^,,S#6	M*K	
 M*K	
 	  &  &7;7K7K$$^47;7K7K$$^4)##r!   present_k_namepresent_v_namekv_nodec                 z   d\  }}| j                   j                  |      }| j                   j                  |      }|Rt        j                  t	        j
                  dd      |      }| j                   j                  || j                         |Rt        j                  t	        j
                  dd      |      }| j                   j                  || j                         | j                   j                  d      }| j                   j                  d      }	t        j                  d||g|g|d	      }
t        j                  d||g|g|	d	      }| j                  j                  |
       | j                  j                  |       | j                  | j                  |<   | j                  | j                  |	<   y)
a?  Split kv_node containing present KV values into separate present K and present V values.

        Args:
            present_k_name (str): name of output to store present K value in
            present_v_name (str): name of output to store present V value in
            kv_node (str): name of present KV values
        )index_0index_1Nr   r   r   r   r9   Gatherr   )r   rJ   r   r   rx   r   rK   r   rE   r   rF   r   r   r   )r   r   r   r   k_indexv_indexk_dimv_dimgather_k_namegather_v_name	present_k	present_vs               r   split_kvzFusionAttention.split_kve  s~    0

**73

**73= ++BHHQg,FWUEJJ&&ud.B.BC= ++BHHQg,FWUEJJ&&ud.B.BC 

33H=

33H=$$W%#$
	 $$W%#$
	 	  +  +6:6J6J$$]36:6J6J$$]3r!   c                    |dz   j                  dd      }|dz   j                  dd      }| j                  j                  d      }| j                  j                  d      }t        j                  d|g|g|g d      }t        j                  d|g|g|g d      }| j
                  j                  |       | j
                  j                  |       | j                  | j                  |<   | j                  | j                  |<   ||fS )a}  Transpose past_k and past_v from (B,N,P,H) to (B,P,N,H)

        Args:
            past_k (str): name of past K value of shape (B,N,P,H)
            past_v (str): name of past V value of shape (B,N,P,H)

        Returns:
            past_k_transpose (str): name of past K value of shape (B,P,N,H)
            past_v_transpose (str): name of past V value of shape (B,P,N,H)
        _transposedr   r   	Transpose)r   rt   r9   ru   )r5   r6   r7   perm)	r   r   rE   r   rF   r   r   r   r   )	r   r   r   past_k_transposepast_v_transposetranspose_k_nametranspose_v_nametranspose_ktranspose_vs	            r   transpose_kvzFusionAttention.transpose_kv  s    #]2;;CE"]2;;CE::66{C::66{C&&8%&!
 &&8%&!
 	  -  -9=9M9M$$%569=9M9M$$%56!111r!   q_addk_addv_addname_prefixc                    | j                   j                  |j                  d         xs( | j                   j                  |j                  d         }t        j                  |      }t        j                  |      }t        j                  |      }|g| j                   j                  |j                  d         xs( | j                   j                  |j                  d         }	t        j                  |	      }|g| j                   j                  |j                  d         xs( | j                   j                  |j                  d         }
t        j                  |
      }t        j                  |||fd      }dt        j                  |j                        z  }|dz   }| j                  ||j                  |g|       |S )Nr9   r   r   ru   	_qkv_biasr7   r=   r>   r?   )r   rJ   r.   r   r   rx   
zeros_likestackprodshaperK   r=   )r   r   r   r   r   q_biasqbkbvbk_biasv_biasqkv_biasqkv_bias_dim	bias_names                 r   create_combined_qkv_biasz(FusionAttention.create_combined_qkv_bias  sy    ++EKKN;itzz?Y?YZ_ZeZefgZh?i!!&)]]2]]2ZZ//A?m4::C]C]^c^i^ijk^lCmF%%f-BZZ//A?m4::C]C]^c^i^ijk^lCmF%%f-B88RRLq1277288,,+-	&&	 	 	
 r!   q_matmulk_matmulv_matmulc                 6   | j                   j                  d      }|j                  d   |j                  d   k(  r|j                  d   |j                  d   k(  sJ | j                   j                  |j                  d         }	| j                   j                  |j                  d         }
| j                   j                  |j                  d         }t	        j
                  |	      }t	        j
                  |
      }t	        j
                  |      }|j                  |j                  k(  r|j                  |j                  k(  sJ |j                  d   }t        j                  |||fd      j                  |d|z  f      }|dz   }| j                  ||	j                  |j                  d   |j                  d   g|       |dz   }t        j                  d|j                  d   |g|g|	      }| j                  | j                  |<   |g}|d
z   }| j                  |t         j"                  dgdgd       |dz   }| j                  |t         j"                  dg|gd       |dz   }| j                  |t         j"                  dgd|z  gd       |dz   }| j                  |t         j"                  dgd|z  gd       |dz   }| j                  |t         j"                  dgdgd       |dz   }t        j                  d||||g|g| j                   j                  d      	      }| j                  | j                  |j$                  <   |dz   }t        j                  d||||g|g| j                   j                  d      	      }| j                  | j                  |j$                  <   |dz   }t        j                  d||||g|g| j                   j                  d      	      }| j                  | j                  |j$                  <   |} |}!|}"|j'                  |||g       | j(                  rL|| j                   j                  |j                  d         rdnd}#t        j*                  t	        j
                  | j                   j                  |j                  |#                     rH||j                  d|#z
  <   |} |j-                  |       | j                  | j                  |j$                  <   || j                   j                  |j                  d         rdnd}#t        j*                  t	        j
                  | j                   j                  |j                  |#                     rH||j                  d|#z
  <   |}!|j-                  |       | j                  | j                  |j$                  <   || j                   j                  |j                  d         rdnd}#t        j*                  t	        j
                  | j                   j                  |j                  |#                     rH||j                  d|#z
  <   |}"|j-                  |       | j                  | j                  |j$                  <   | j.                  j'                  |       | |!|"fS )a  Create packed QKV MatMul node before MultiHeadAttention node.
           This is for the scenario where an Attention node should be created but cannot be created
           because past_key and past_value are separate inputs and not one concatenated input.

        Args:
            q_matmul (NodeProto): name of MatMul from Q path - (batch_size, sequence_length, hidden_size)
            k_matmul (NodeProto): name of MatMul from K path - (batch_size, sequence_length, hidden_size)
            v_matmul (NodeProto): name of MatMul from V path - (batch_size, sequence_length, hidden_size)
            q_add (NodeProto): name of Add from Q path
            k_add (NodeProto): name of Add from K path
            v_add (NodeProto): name of Add from V path
            num_heads (int): number of heads

        Returns:
            Union[NodeProto, None]: the node created or None if failed.
        MatMulr   r9   r   ru   _qkv_weightr   _qkv_outr4   _q_start_indexFr<   _k_start_index_v_start_indexrt   _end_of_qkv_index_qkv_last_axisr   _q_outSlice_k_out_v_out)r   rE   r.   rJ   r   r   r   rx   r   reshaperK   r=   r   rF   r   r   r   rM   r7   rH   rd   anyr   r   )$r   r   r   r   r   r   r   ra   matmul_node_nameq_weightk_weightv_weightqwkwvwd
qkv_weightqkv_weight_nameqkv_matmul_output
qkv_matmul	qkv_nodesq_slice_namek_slice_namev_slice_nameend_of_qkv_nameqkv_last_axis_nameq_slice_outputq_slicek_slice_outputk_slicev_slice_outputv_sliceq_outputk_outputv_outputinitializer_inputs$                                       r   create_packed_qkv_matmul_nodez-FusionAttention.create_packed_qkv_matmul_node  s$   4  ::66x@ ~~a HNN1$55(..:Kx~~^_O`:``` ::--hnnQ.?@::--hnnQ.?@::--hnnQ.?@!!(+!!(+!!(+xx288#BHH(<<<HHQKXXr2rl3;;QAJG
*]: ((""1%z'7'7':;	 	 	
" -z9%%NN1%7&'!	

 :>9M9M$$%56L	 (*::,+:K:KSTRU]^\_ejk'*::,+:K:KSTRU]^\_ejk'*::,+:K:KSTRU]^ab]b\cino*-@@/[=N=NVWUX`ade`e_flqr-0@@"4@Q@QYZX[cebflqr)H4""%|\CUV#$,,W5	
 6:5I5I$$W\\2)H4""%|\CUV#$,,W5	
 6:5I5I$$W\\2)H4""%|_FXY#$,,W5	
 6:5I5I$$W\\2'7G4511 )-)C)CEKKPQN)SAYZ!66+..tzz/I/I%++VgJh/ijk9GEKK$5 56$H$$U+?C?S?SD00< )-)C)CEKKPQN)SAYZ!66+..tzz/I/I%++VgJh/ijk9GEKK$5 56$H$$U+?C?S?SD00< )-)C)CEKKPQN)SAYZ!66+..tzz/I/I%++VgJh/ijk9GEKK$5 56$H$$U+?C?S?SD00< 	  +8++r!   r   key_padding_maskr   r   
packed_qkvc           	         |dkD  sJ |dkD  r$||z  dk7  rt         j                  d| d|        yt        | j                  j	                         j
                  D cg c]  }|j                   c}      }| j                  j                  d      }g }|rX| j                  |||||||      \  }}}|j                  |j                  d   |j                  d   |j                  d   g       n#t        |      t        u rt        |      t        u r| j                  r<|j                  |j                  d   |j                  d   |j                  d   g       n|j                  |j                  d   |j                  d   |j                  d   g       n}t        |      t        k(  rjt        |      t        k(  rX||v rT||v rP| j                  r"|j                  |j                  d   ||g       n#|j                  |j                  d   ||g       ny| j                  s&| j                  ||||      }|j!                  |       n|j!                  d       |r|r|j                  |
|||g       n|
s|r|j                  |
|g       |	g}|r|r|j                  ||g       t#        j$                  d|||      }d	|_        |j(                  j                  t#        j*                  d
|      g       |S c c}w )a[  Create a MultiHeadAttention node.

        Args:
            q_matmul (NodeProto): name of MatMul from Q path - (batch_size, sequence_length, hidden_size)
            k_matmul (NodeProto): name of MatMul from K path - (batch_size, sequence_length, hidden_size) or (batch_size, num_heads, past_sequence_length, head_size)
            v_matmul (NodeProto): name of MatMul from V path - (batch_size, sequence_length, hidden_size) or (batch_size, num_heads, past_sequence_length, head_size)
            q_add (NodeProto): name of Add from Q path
            k_add (NodeProto): name of Add from K path
            v_add (NodeProto): name of Add from V path
            num_heads (int): number of attention heads. If a model is pruned, it is the number of heads after pruning.
            hidden_size (int): hidden dimension. If a model is pruned, it is the hidden dimension after pruning.
            output (str): output name of MHA
            key_padding_mask (str): name of key padding mask
            add_qk (str): name of add after Q x K'
            past_k (str): name of past K value - (batch_size, num_heads, past_sequence_length, head_size)
            past_v (str): name of past V value - (batch_size, num_heads, past_sequence_length, head_size)
            present_k (str): name of present K value - (batch_size, num_heads, sequence_length, head_size)
            present_v (str): name of present V value - (batch_size, num_heads, sequence_length, head_size)
            packed_qkv (bool): whether to combine MatMuls from Q, K, V paths
                               Note: This is for the scenario where an Attention node should be created but cannot be created
                               because past_key and past_value are separate inputs and not one concatenated input.

        Returns:
            Union[NodeProto, None]: the node created or None if failed.
        r   input hidden size # is not a multiple of num of heads Nrh    rg   r4   com.microsoftra   )r   r   setr   graphr.   r7   rE   r*  rH   r   typer   rd   rZ   r   r   r   rF   domainrG   rI   )r   r   r   r   r   r   r   ra   r`   r   r+  r   r   r   r   r   r,  r   graph_input_namesmha_node_name
mha_inputsr!  r#  r%  r   mha_outputsmha_nodes                              r   create_multihead_attention_nodez/FusionAttention.create_multihead_attention_nodef  s   Z 1}}?i 7A=LL-k]:]^g]hijtzz7G7G7I7O7O P7Ot7O PQ

33K@ 
(,(J(J(HeUE9)%GWg w~~a0'..2CW^^TUEVWX(^y(T(^y-H55!!5<<?HOOA4FUV"XY!!8??1#5xq7I8??[\K]"^_Nc!X#%----55!!5<<?Hh"GH!!8??1#5x"JK 5555eUE=YIi(b! f/HI/89 h	956## 	
 *!!6#8#8i#P"QRu !Qs   K0r&   r.   
add_qk_strscalecausalc                    |dkD  sJ |	dkD  r$|	|z  dk7  rt         j                  d|	 d|        yd}|||d}| j                  j                  |j                  d         }| j                  j                  |j                  d         }| j                  j                  |j                  d         }d\  }}}|r| j                  j                  |j                  d         xs( | j                  j                  |j                  d         }| j                  j                  |j                  d         xs( | j                  j                  |j                  d         }| j                  j                  |j                  d         xs( | j                  j                  |j                  d         }|r|r|r|sy|t        |j                  d    d	       yt        j                  |      }t        j                  |      }t        j                  |      }|j                  |j                  k(  sJ |j                  d   }|j                  d   }|j                  d   }||cxk(  r|k(  sJ  J |	dkD  r!|	|k7  rt         j                  d
|	 d| d       d} |j                  |j                  k7  rd} t        j                  |j                  dd       }!t        j                  |j                  dd       }"t        j                  |j                  dd       }#d}$| r#t        j                  |||fd      }%|!|"z   |#z   }$nt        j                  |||fd      }%d|!z  }$|rt        j                  |      }&t        j                  |      }'t        j                  |      }(t        j                  |&j                        })t        j                  |'j                        }*t        j                  |(j                        }+|)|*cxk(  r|!k(  sJ  J |+|#k(  sJ | r#t        j                  |&|'|(fd      },|)|*z   |+z   }-nt        j                  |&|'|(fd      },d|)z  }-| j                  j                  d      }.| j                  s$| j!                  |.dz   |j"                  ||$g|%       |r#| j!                  |.dz   |j"                  -g,       | j                  rw|rt         j                  d       y|j$                  d   |j$                  d   |j$                  d   |.dz   g}/||/j'                  |       t)        j*                  d|/|g|.      }0n|
|.dz   |r|.dz   ndg}/||/j'                  |       n|/j'                  d       |xr |}1|1r#| j-                  ||      }2|/j'                  |2       |5| j/                  |      }3|1s|/j'                  d       |/j'                  |3       |g}4|rX|rV|j1                  dd      j1                  dd      j1                  dd      }5|4j'                  |5       | j3                  |||5       t)        j*                  d|/|4|.      }0d|0_        |0j6                  j9                  t)        j:                  d|      g       |r0|0j6                  j9                  t)        j:                  dd      g       |0|0j6                  j9                  t)        j:                  d|      g       | r3|0j6                  j9                  t)        j:                  d|!|"|#g      g       | j<                  C|0j6                  j9                  t)        j:                  d t?        | j<                              g       |0S )!a+  Create an Attention node.

        Args:
            mask_index (str): mask input
            q_matmul (NodeProto): MatMul node in fully connection for Q
            k_matmul (NodeProto): MatMul node in fully connection for K
            v_matmul (NodeProto): MatMul node in fully connection for V
            q_add (NodeProto): Add bias node in fully connection for Q
            k_add (NodeProto): Add bias node in fully connection for K
            v_add (NodeProto): Add bias node in fully connection for V
            num_heads (int): number of attention heads. If a model is pruned, it is the number of heads after pruning.
            hidden_size (int): hidden dimension. If a model is pruned, it is the hidden dimension after pruning.
            input (str): input name
            output (str): output name
            add_qk_str (str): name of Add node after Q x K'
            past_k (str): name of input for past K value
            past_v (str): name of input for past V value
            present_k (str): name of output to store present K value
            present_v (str): name of output to store present V value
            scale: scale before softmax
            causal: whether it is uni-directional mask.

        Returns:
            Union[NodeProto, None]: the node created or None if failed.
        r   r.  r/  NTFr9   )NNNzl is not an initializer. Please set do_constant_folding=True in torch.onnx.export to unblock attention fusionzInput hidden size (z3) is not same as weight matrix dimension of q,k,v (z:). Please provide a correct input hidden size or pass in 0r   ru   rh   r  r   r   zVMultiHeadAttention does not support relative_position_bias: cannot fuse the attention.rg   r4   r0  z.key_keyr   r   r1  ra   unidirectionalr=  qkv_hidden_sizesrj   ) r   r   r   rJ   r.   printr   r   r   r   rx   r   concatenater   rE   rc   rK   r=   r   r   r   rF   r   r   r   r   r5  rG   rH   rI   rj   float)6r   r&   r   r   r   r   r   r   ra   r`   r.   r   r<  r   r   r   r   r=  r>  has_biasr  r  r  r   r   r   r  r  r  
qw_in_size
kw_in_size
vw_in_sizeis_qkv_diff_dimsqw_out_sizekw_out_sizevw_out_sizeqkv_weight_dimr  r   r   r   q_bias_shapek_bias_shapev_bias_shaper   r   attention_node_nameattention_inputsattention_nodepast_existspast_kvr   attention_outputs
present_kvs6                                                         r   create_attention_nodez%FusionAttention.create_attention_node  s   \ 1}}?i 7A=LL-k]:]^g]hij=U]u}H::--hnnQ.?@::--hnnQ.?@::--hnnQ.?@!1ZZ//A?m4::C]C]^c^i^ijk^lCmFZZ//A?m4::C]C]^c^i^ijk^lCmFZZ//A?m4::C]C]^c^i^ijk^lCmFf>>!$% &g g !!(+!!(+!!(+ xx288###XXa[
XXa[
XXa[
Z5:55555?{j8NN%k]2efpeq rJ J
 !88rxx#
 ggbhhqrl+ggbhhqrl+ggbhhqrl+R1=J(;6DN2r2,Q7J_N%%f-B%%f-B%%f-B77288,L77288,L77288,L<>;>>>>>;...>>2r2,Q?+l:\I88RRLq9 </"jj99+F,,  (=8",, .1	 !    (;6 **"^	 !  ((uv """#k1	  % ''
3#--$'(	N #m35=#k12 
 % ''
3 ''+ +VK..8 ''0%#'#6#6z#B  #$++B/ ''(89!'Y&..vr:BB62NVVWZ\_`
!((4iJ?#--')(	N !0  '')>)>{I)V(WX$$++V-B-BCSUV-W,XY$$++V-B-B7E-R,ST$$++&&'9KVa;bcd !!-$$++V-B-BCVX]^b^t^tXu-v,wxr!   c                    |}|j                   dk(  r#| j                  j                  |dd      }||}ny | j                  j                  |g dg d      }d }|	|\  }}}	}
}n,| j                  j                  |g dg d      }||\  }}}
}ny g }t	        |j
                        D ]1  \  }}||vr||d   j                  d   k(  r!|j                  |       3 t        |      dk7  ry |d   }	 | j                  j                  |d	d      }|h||j                  d      }|3t        |      d
k(  r%|d   }|j                   dk(  r|j                  d   }n\y |t        |      dk(  r|j                  d   }n;y |j                   dk(  r+||   }|D ]!  }|j                   dk(  s|j                  d   }# 	 ||   }|j                   dk(  r't        |j                        dk(  r|j                  d   }||   }|D cg c]  }|j                    }}|j                  d      dk7  ry | j                  j                  |g dg d      }|t        j                  d       y |\  }}}}d}d}d}g dg dfg dg dfg dg dfg dg dfg dg dfd}d }|j                         D ]A  \  }} | j                  j                  || d   | d         }|,|dk(  rd}|d k(  rd}|d!k(  rd} n |t        j                  d"       y d }!d }"d }#|r|\  }}#}"}n|r|\  }}!}#}"n|r|\  }}}"n|\  }}!}}"| j                  j                  |"g dg d#      }$|$9| j                  j                  |"g d$g d%      }$|$t        j                  d&       y |$d'   }%|$d(   }&|$d)   }'| j                  j                  |"g dg d      }(|(9| j                  j                  |"g d*g d+      }(|(t        j                  d,       y |(d(   })|(d)   }*d }+d },|r7| j                  j                  |#g d-g dfg d.g dfg d/g d0fg|      \  }}+}n|r]| j                  j                  |#g d1g d0fg d.g dfg|      \  }}+}|!^| j                  |!      },|,Kt        j                  d2|!        y |rn/| j                  j                  |!g d3g d4fg d5g d6fg|      \  }}+}|s|+t        j                  d7       y |sMt        |+      dkD  r?|+d   j                   d	k(  r-| j                  j                  |+d         \  }}-|-d8k7  r|-| _        |j
                  d   |k(  r|'j
                  d   |k(  ru|*j
                  d   |k(  ra|s+| j"                  j%                  |+d)   j
                  d         nd }.|	n|
}/| j'                  |%      \  }0}1|0dk  s|1dk  rt        j)                  d9       y | j+                  |.|'|*||&|)||0|1||/j                  d   |,      }2|2y | j,                  j                  |2       | j.                  | j0                  |2j2                  <   ||j
                  d   }3d:|3z   }4| j5                  d;|3z   t6        j8                  dgt;        j<                  dd|0t?        |1|0z        g      d<      }5| j                  jA                  tC        jD                  d=|/j                  d   |5j2                  g|4gd>|3z         | j.                         |4|j
                  d<   | jF                  jI                  |/|
|g       | jF                  jI                  |       | jF                  jI                  | jJ                  s|$n|$d d)        | jF                  jI                  | jJ                  s|(n|(d d)        | jF                  jI                  | jJ                  s|n|d d)        d| _&        y y y y c c}w )?Nr_   Addr   )r[  r   r   r   r   )NNr   r   r   )r[  Einsumr   r   )r9   Nr   r   r9   Mulrt      r^   rs   r   ru   )r   r   r[  r   )r9   r   r   Nz&fuse_attention: failed to match v pathF)Softmaxr[  Divr   )r   r   Nr   )r_  r[  r]  r   )r_  Wherer   r`  )r   r   rt   r   )r_  r[  ra  r   )r   r   r   rt   )r_  r`  r   )r   r   r   )path1path2path3path4path5rd  Tre  rf  z'fuse_attention: failed to match qk path)r   r   r   N)r`  r   r   r[  r   )r   r   r   r   Nz&fuse_attention: failed to match q pathr   )r   r   r   r[  r   )r9   r   r   r   Nz&fuse_attention: failed to match k path)Expandr   Equal)rj  r   r   )Castri  r   rj  )r   r   r   r   )rk  rj  r   r   z4fuse_attention: failed to verify shape inference of )r]  Subrk  r   r   )Nr   r9   r   r   )r]  rl  r   r   )Nr   r9   r   z)fuse_attention: failed to match mask pathizmFailed to detect num_heads and hidden_size for Attention fusion. Please specify those parameters in argument.edge_modified_shape_modified_tensorr<   r   reshape_modified_)'r   r   match_parentmatch_parent_path	enumerater.   r   r   r*   countr   r   itemsmatch_parent_pathsr   get_constant_inputrj   rb   rU   r   r   rY  r   r   r   r7   rK   r   rM   rx   r   intrN   r   rF   nodes_to_removerH   rc   prune_graph)6r   normalize_nodeinput_name_to_nodesoutput_name_to_node
start_nodeadd_before_layernormr  einsum_noder   reshape_qkvtranspose_qkv
matmul_qkvother_inputs_ir.   
root_inputmul_before_layernormmul_childrenlayernorm_nodechildrenchildparent_nodechildren_typesv_nodesadd_vmatmul_v
is_distillis_distill_addis_no_mask_attentionqk_pathsqk_nodeskvr   	matmul_qkwhere_qkq_nodesr}   add_qmatmul_qk_nodesadd_kmatmul_k
mask_nodesr<  mul_valr&   attention_last_nodeq_num_headsq_hidden_sizenew_nodeunique_indexnew_edgeshape_tensors6                                                         r   fusezFusionAttention.fuse  s    $
!!%99#'::#:#:>5RS#T #/1
 JJ00?!
	
  =F:Q;z 

44DoI $>G;K
":#3#34IB//	!++A..& 5 |!!!_
	  $zz66z5!L+./C/J/J1/MNL'C,=,B!-a!))-AA!/!6!6q!9J)c,.?1.D188;
##';;*:6H!==$88!&aJ "	 **5"::s;CUCU?VZ[?[$++A.J&z25=>XE%--X>)Q.**..z;dfuv?LLAB")Auh
$9?K9?K;\J;\J2I>
 NN$DAqzz33J!adKHG|!
G|!%G|'+$ % LLBC	*2'Q)Q/7,Q)! (Q9(0%Q9**..y:cetu?jj22@"G
 EFBK	2;**..y:cetu?jj22F"G
 EF2; 

#zz<<3Y?8)D;\J
 $ Az1 #zz<<@,O8)D $ Az1 !!008
%LL#WX^W_!`a!#zz<< I* >O $
 Az1 $
(:LLDE#J!(;
1@U@UY^@^66z!}EJAw& )0&>>!
*x~~a/@J/NS[SaSabcSdhrSrZn,,99*R.:N:Nq:QRtxJ1<1D+-)-)K)KI)V&Ka=A#5C  11#**1-H $$X.:>:N:ND((7&*003+l:#330<?)//1ac-+:U6V"WX  4   

##$$!,33A68I8IJ!
+l:	 (( (0!!!$  '')<mZ(XY  ''1   ''t7T7TZabeceZfg  ''t7T7TZabeceZfg  ''t7T7TZabeceZfg  $DG Ts/N* ?s   5_)r0  r0  r0  r0  r0  r0  F)r0  r0  r0  r0  r0  NF)rV   rW   rX   rY   r   rw  r   r   boolr   rZ   r    r   r   r|   r   r   r   r   r   r   r   r   r   r*  r;  rE  rY  r  __classcell__)rp   s   @r   r]   r]   h   s     37).27&>@T%U%% % 	%
 !/% #'% ,0% c%40	 0eTWY\T\o 0>'&y '&U3PS8_ '&RY * S  25 5S 5S 5n/$ /$c /$sCj /$b+Ks +KC +K# +KZ%23 %2 %2N Y_% Y_%	
  
y$	<M,M, M, 	M,
 M, Y_%M, Y_%M, M, 
y$	M,t !# #mm 	3,-m 	3,-	m
 m Y_%m Y_%m m m m m m m m m  !m" #m$ 
y$	%mx !%'cc c 	c
 c c c c c c c c c c c  !c" #c$ %c& 'c( 
y$	)cJY$r!   r]   )loggingr   typingr   r   r   r   numpyrx   fusion_baser   fusion_optionsr	   fusion_utilsr
   r   onnxr   r   r   r   
onnx_modelr   rV   r   r   r]   r[   r!   r   <module>r     sJ   
  / /   . 1 = =  	8	S Slk$f k$r!   