
    ge                        d dl Z d dlmZmZ d dlmZ d dlmZ d dlm	Z	m
Z
mZmZmZ d dlmZ  e j                   e      Z G d de      Z G d	 d
e      Zy)    N)OptionalUnion)FusionAttention)Fusion)FunctionProto	NodeProtoTensorProtohelpernumpy_helper)	OnnxModelc                        e Zd ZdZdededef fdZ	 	 	 	 	 	 	 ddeded	ed
edededededededede	e
   deedf   fdZd Zd Zd Z xZS )FusionRotaryAttentionze
    Fuse Attention subgraph with rotary positional embeddings into one MultiHeadAttention node.
    modelhidden_size	num_headsc                 2    t         |   |||dg d       y )NT)SimplifiedLayerNormalization SkipSimplifiedLayerNormalizationLayerNormalizationSkipLayerNormalizationAdd)use_multi_head_attentionsearch_op_types)super__init__)selfr   r   r   	__class__s       e/var/www/openai/venv/lib/python3.12/site-packages/onnxruntime/transformers/fusion_rotary_attention.pyr   zFusionRotaryAttention.__init__   s(     	%) 	 	
    Ninputoutputq_rotaryk_rotaryv_matmul	attn_maskadd_qkpast_kpast_v	present_k	present_vscalereturnc                    | j                   dkD  sJ | j                  dkD  rL| j                  | j                   z  dk7  r0t        j                  d| j                   d| j                           y | j                  j                  d      }|j                  d   |j                  d   |j                  d   d||||	g}|g}|
r|r|j                  |
|g       t        j                  d|||      }d|_
        |j                  j                  t        j                  d| j                         g       |0|j                  j                  t        j                  d	|      g       | j                  C|j                  j                  t        j                  d
t        | j                              g       | j                  d       |S )Nr   z)fuse_rotary_attention: input hidden size z# is not a multiple of num of heads MultiHeadAttention inputsoutputsnamecom.microsoftr   r+   mask_filter_value)r   r   loggerdebugr   create_node_namer!   extendr
   	make_nodedomain	attributemake_attributer5   floatincrease_counter)r   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   mha_node_name
mha_inputsmha_outputsmha_nodes                    r   create_mha_nodez%FusionRotaryAttention.create_mha_node)   s    ~~!!!aT%5%5%F1$LLL;D<L<L;MMpquqq  qA  B 

334HIOOAOOAOOA	

 h	956## 	
 *!!6#8#8dnn#U"VW%%v'<'<We'L&MN!!-%%v'<'<=PRWX\XnXnRo'p&qr23r   c	                 
   | j                   j                  |dgdg      }	| j                   j                  |dgdg      }
|	|
y|	d   |
d   }}| j                   j                  |g dg d      }| j                   j                  |g dg d      }| j                   j                  |g dg d      }| j                   j                  |g dg d      }||||y|\  }}}|\  }}}|j                  d   |k7  s|j                  d   |k7  ry|d   j                  |j                  k7  s|d   j                  |j                  k7  ry| j                   j                  |dgdg      }| j                   j                  |dgdg      }||y|d   |d   }}| j                   j                  |g d	g d
      }| j                   j                  |g dg d      }| j                   j                  |g dg d      }| j                   j                  |g dg d      }||||y|d   j                  |j                  k7  sT|d   j                  |j                  k7  s8|d   j                  |j                  k7  s|d   j                  |j                  k7  ry| j                   j                  |dgdg      }|y|d   }| j                   j                  |g d	g d
      } | j                   j                  |g dg d      }!| |!y| d   j                  |j                  k7  s|!d   j                  |j                  k7  ry| j                   j                  |dgdg      }"|"y|"d   }#| j                   j                  |#g d	g d
      }$| j                   j                  |#g dg d      }%|$|%y|$d   j                  |j                  k7  s|%d   j                  |j                  k7  ry|$d   }&| d   }'|d   }(|j                  d   })|&j                  d   |)k7  s$|'j                  d   |)k7  s|(j                  d   |)k7  ry| j                   j                  |g dg d      }*| j                   j                  |g dg d      }+|*|*\  }},}-n|+|+\  }}},}-ny|-j                  d   dvry| j                   j                  |,g dg d      }.| j                   j                  |-g dg d      }/| j                   j                  |-dgdg      }0|.|/|0y|.d   j                  |/d   j                  k7  s|.d   j                  |/d   j                  k7  ry|/d   j                  d   |0d   j                  d   k7  ryy)NConcat   Fr   	UnsqueezeGatherShaper   r   r   rG   r   r   )   r   r   )rI   MulrJ   rK   r   r   r   r   )rI   r   rJ   rK   rG   r   r   r   rN   )rN   r   r   r   rF   SlicerS   CastrF   rS   rS   >   r%   attention_mask)rN   r   rG   r   rI   T)r   match_parent_pathr    r3   r!   )1r   reshape_qkv_2reshape_qkv_1reshape_q_2reshape_k_2reshape_v_2reshape_v_1r&   
root_inputconcat_qkv_2_pathconcat_qkv_1_pathconcat_qkv_2concat_qkv_1reshape_qkv_2_path_1reshape_qkv_2_path_2reshape_qkv_1_path_1reshape_qkv_1_path_2_gather_1shape_1gather_2shape_2concat_v_2_pathconcat_v_1_path
concat_v_2
concat_v_1reshape_v_2_path_1reshape_v_2_path_2reshape_v_1_path_1reshape_v_1_path_2concat_k_2_path
concat_k_2reshape_k_2_path_1reshape_k_2_path_2concat_q_2_path
concat_q_2reshape_q_2_path_1reshape_q_2_path_2mul_qmul_kmul_vgather_1_outattn_mask_path_1attn_mask_path_2
slice_qk_2
slice_qk_1slice_qk_2_pathslice_qk_1_path_1slice_qk_1_path_2s1                                                    r   &check_runtime_shape_paths_for_functionz<FusionRotaryAttention.check_runtime_shape_paths_for_functiona   s    !JJ88
UVTWX JJ88
UVTWX$(9(A%6q%9;LQ;Ol#zz;;LJjluv#zz;;LJjluv#zz;;LJjluv#zz;;LJjluv (#+#+#+38W38W ==z)W]]1-=-K  "''8==8<PQR<S<X<X\d\i\i<i **66{XJQRPST**66{XJQRPST"o&=!0!3_Q5GJ
!ZZ99?
 "ZZ99?
 "ZZ99*Ffhqr!ZZ99*Ffhqr&!)!)!) q!&&(--7!!$))X]]:!!$))X]]:!!$))X]]: **66{XJQRPST"$Q'
!ZZ99?
 "ZZ99?
 %);)C a %%6:LQ:O:T:TX`XeXe:e **66{XJQRPST"$Q'
!ZZ99?
 "ZZ99*Ffhqr%);)C a %%6:LQ:O:T:TX`XeXe:e #1%"1%"1%q);;q>\)U[[^|-Ku{{[\~amOm  ::77@\^gh::77@dfrs'(8%Az:)+;(Aq*jA&EE**66?
 !JJ88?
 !JJ88k]UVTWX"&7&?CTC\ 1""&7&:&?&???STCUCZCZ^opq^r^w^wCw Q%%a(,=a,@,F,Fq,IIr   c                 ,   | j                   j                  |dgdg      }|y|d   }| j                   j                  |g dg d      }| j                   j                  |g dg d      }	||	y|\  }
}}|	\  }
}}|j                  d   |k7  s|j                  d   |k7  ry| j                   j                  |dgdg      }|y|d   }| j                   j                  |g dg d      }| j                   j                  |g dg d      }||y|d   j                  |j                  k7  s|d   j                  |j                  k7  ry| j                   j                  |dgdg      }|y|d   }| j                   j                  |g dg d      }| j                   j                  |g dg d      }||y|d   j                  |j                  k7  s|d   j                  |j                  k7  ry| j                   j                  |dgdg      }|y|d   }| j                   j                  |g dg d      }| j                   j                  |g dg d      }||y|d   j                  |j                  k7  s|d   j                  |j                  k7  ryy)	NrF   rG   Fr   rH   rL   rM   T)r   rW   r    r3   )r   reshape_qkv	reshape_q	reshape_k	reshape_vr^   concat_qkv_path
concat_qkvreshape_qkv_path_1reshape_qkv_path_2rg   rh   ri   rj   rk   concat_v_pathconcat_vreshape_v_path_1reshape_v_path_2concat_k_pathconcat_kreshape_k_path_1reshape_k_path_2concat_q_pathconcat_qreshape_q_path_1reshape_q_path_2s                              r   #check_runtime_shape_paths_for_nodesz9FusionRotaryAttention.check_runtime_shape_paths_for_nodes   s    **66{XJQRPST"$Q'
!ZZ99*Ffhqr!ZZ99*Ffhqr%);)C18W18W ==z)W]]1-=-K 

44Y
QCP  #::77Bbdmn::77Bbdmn#'7'? A##x}}48H8K8P8PT\TaTa8a 

44Y
QCP  #::77Bbdmn::77Bbdmn#'7'? A##x}}48H8K8P8PT\TaTa8a 

44Y
QCP  #::77Bbdmn::77Bbdmn#'7'? A##x}}48H8K8P8PT\TaTa8ar   c                 |    |j                   dvry d } j                  j                  |g dg d      } j                  j                  |g dg d      } j                  j                  |g dg d      }||\  }}	}}
}|}n/|
|\  }}}}|}n#||\  }}}}}|}nt        j	                  d       y d\  }}}d }d } j                  j                  |g d	g d
      } j                  j                  |g dg d      } j                  j                  |g dg d      } j                  j                  |g dg dfg dg dfg dg dfg dg dfg dg dfg dg dfg dg dfg dg dfg dg dfg	d       \  }}} j                  j                  |g d g d!      }|x|\  }}}}}}|} j                  j                  |d"d#gd$d%g      }|t        j	                  d&       y |d$   j                  d$   }|d'   j                  d$   }|j                  d$   }n|(|\  }}}}|}|j                  d$   }|j                  d$   }n||\  }}}|}|j                  d$   }n|<t        |      d(k(  r.|d$   d)d  \  }}}}|}|j                  d$   }|j                  d$   }nC|+|\  }}}}}|}|}|j                  d$   }|j                  d$   }nt        j	                  d*       y  j                  j                  |g d+g d,      }d-\  }} ||\  }}}} nt        j	                  d.       y d/\  }!}" j                  j                  |g d0g d      }# j                  j                  |g d1g d      }$ j                  j                  |g d2g d3      }% j                  j                  |g d4g d5      }& j                  j                  |g d6g d7      }' j                  j                  |g d8g d3      }( j                  j                  |g d9g d:      })|#|#\  }}*}+|*j                  d$   }!n|$|$\  }}}*}+|*j                  d$   }!n|%" j                  |%d$   j                  d$         }"n|&" j                  |&d$   j                  d$         }"nd|'|'d$   j                  d$   }"nO|(|(d$   j                  d$   }"n:|)" j                  |)d$   j                  d$         }"nt        j	                  d;       y d/\  },}-d }.d }/d }0 j                  j                  | g d<g d
      }1 j                  j                  | g d=g d      }2 j                  j                  | g d>g d?      }3 j                  j                  | g d@g d:fg dAg dBfg dCg dDfg dEg dFfg dGg dHfg dIg dJfg dKg dLfg dIg dMfg dIg dNfg	d       \  }}4} j                  j                  | g dOg dP      }5|1~|1\  }6}}7}}8}9|1}. j                  j                  |7d"d#gd$d%g      }:|:t        j	                  dQ       y |:d$   j                  d$   },|:d'   j                  d$   };|7j                  d$   }-||;k(  sJ |2|2\  }}8}}<}9|2}.|8j                  d$   }-n|3*|3\  }}7}8}}<}9|3}.|7j                  d$   },|7j                  d$   }-n|4Et        |4      d(k(  r7|4d$   dRd  \  }<}9|4d$   dSdT \  }7}8|4}.|7j                  d$   },|7j                  d$   }-nE|5-|5\	  }}7}0}8}/}}<}}9|5}.|7j                  d$   },|7j                  d$   }-nt        j	                  dU       y d }=d }>d }? j                  j                  | g dVg d,      }@ j                  j                  | g dWg d,      }A j                  j                  | g dXg dY      }B|@
@\  }C}}D}E|@}=n1A
A\  }D}}F}E|A}=n%BB\  }?}D}>}}F}}E|B}=nt        j	                  dZ       y Ej                  d$   |9j                  d$   k7  r5|9j                  d$   |j                  d$   k7  rt        j	                  d[       y d\}G||k(  rK j                  	
C6|Ej                  d$         st        j	                  d]       y |	j                  d$   }Gn|||fv rɉ j                  F<Ej                  d$         st        j	                  d]       y |j                  d$   }G|>r|>j                  d$   nEj                  d$   Dj                  d$<   |/r|/j                  d$   n|9j                  d$   |8j                  d$<   |?|8j                  d^z   |8j                  d$<   ||k(  r|d_d  } fd`}H|?r|0r j                  j                  da      }I|Id^z   }Jt        j                  da|0j                  d$   g|Jg|Ib      }K|Kj                   j#                  t        j$                  dcg dd      g        j                  j                  da      }L|Ld^z   }Mt        j                  da|?j                  d$   g|Mg|Lb      }N|Nj                   j#                  t        j$                  dcg dd      g        H<      }O|Ot        j	                  de       y  j                  j                  dfdgh      }Pt        j                  dfKj                  d$   Oj                  d$   g|Pd^z   g|Pb      }Q j                  j                  dfdih      }Rt        j                  dfNj                  d$   |Oj                  d$   g|Rd^z   g|Rb      }S|Q}8|S}D j&                  j)                  |O        j&                  j)                  |K        j&                  j)                  |N        j&                  j)                  |Q        j&                  j)                  |S        j*                   j,                  |Oj                  <    j*                   j,                  |Kj                  <    j*                   j,                  |Nj                  <    j*                   j,                  |Qj                  <    j*                   j,                  |Sj                  <    j/                  Ej                  d$   GD|8||!|"|,||-|      }T|Tt        j	                  dj       y  j&                  j)                  T        j*                   j,                  |Tj                  <    j0                  j#                  |d_d         ||k7  r& j0                  j#                  ||d d' n|d dR        n"|d$   d'   g}U|D ]  }V j3                  |VU         j0                  j#                  |       |.|1k(  r  j0                  j#                  |.d dR        nN|.|2k(  r[ j0                  j)                  |.d$           j0                  j)                  |.d%           j0                  j)                  |.dk          n|.|3k(  ry j0                  j)                  |.d$           j0                  j)                  |.d_           j0                  j)                  |.dk           j0                  j)                  |.dl          np|.|5k(  r= j0                  j)                  |.d$           j0                  j)                  |.d_          n.|.|4k(  r)|.d$   d'   |.d$   d)   g}U|.D ]  }V j3                  |VU        |=@k(  r& j0                  j#                  |=d dR        dm _        y |=Ak(  r< j0                  j)                  |=d_           j0                  j)                  |=d%          dm _        y )nN>   r   r   r   )MatMulReshape	Transposer   r   rG   r   r   r   r   )r   r   r   r   rQ   )	AllReducer   r   r   r   z0fuse_rotary_attention: failed to match qkv nodes)r/   r/   r/   )r   r   rF   r   r   r   )rG   r   r   rG   r   r   )rF   r   r   r   )rG   rG   r   r   )r   r   r   rM   )r   ExpandrI   rF   r   r   r   )rG   r   r   r   rG   r   r   )r   r   WhereEqualr   rF   rI   rJ   rK   rF   r   r   r   )rG   r   rG   r   r   r   r   r   r   r   rG   r   r   )r   r   r   r   rO   ConstantOfShaperK   r   rF   rI   rJ   rK   rF   r   r   r   )rG   r   rG   r   rG   r   r   r   r   rG   r   r   r   rG   r   r   )r   r   r   r   rK   r   rF   rI   rJ   rK   rF   r   r   r   )rG   r   rG   rG   r   r   r      r   r   r   rG   r   r   )r   r   r   r   rF   rI   rJ   rK   rF   r   r   r   )rG   r   rG   rN   r      r   r   r   rG   r   r   )	r   rF   rI   rJ   rK   rF   r   r   r   )	rG   rG   r   r   r   r   rG   r   r   )
r   rF   rI   rO   rJ   rK   rF   r   r   r   )
rG   rG   rG   r   r   r   r   rG   r   r   )	rG   rG   rN   r   r   r   rG   r   r   )	rG   rG   r   r   r   r   rG   r   r   )output_name_to_node)rF   r   r   r   r   )rG   rG   r   r   rG   rS   rI   r   rN   zDfuse_rotary_attention: failed to match past/present concat in v path	   z-fuse_rotary_attention: failed to match v path)Softmaxr   Divr   rP   NNz/fuse_rotary_attention: failed to match qk nodes)r/   r/   rR   rT   )r   r   SubrU   r   rI   rI   )rG   r   rN   rG   r   r   r   )r   r   rU   r   rI   rI   )rG   rN   rG   r   r   r   )r   r   r   r   rU   r   rI   rI   )rG   r   r   rN   rG   r   r   r   )r   r   r   rU   r   rI   rI   )	r   rU   r   rU   r   rU   r   rI   rI   )	rG   r   r   r   r   rG   r   r   r   z;fuse_rotary_attention: failed to match attention mask nodes)r   r   rF   r   RotaryEmbeddingr   )r   r   r   r   r   )r   rF   r   r   r   r   )rG   r   rG   r   r   r   )	r   r   r   rI   rF   r   r   r   r   )r   r   r   r   r   r   rF   rI   rJ   rK   rF   r   r   r   r   )rG   r   r   rG   r   r   r   r   r   r   r   rG   r   r   r   )r   r   r   r   r   rO   r   rK   r   rF   rI   rJ   rK   rF   r   r   r   r   )rG   r   r   rG   r   rG   r   r   r   r   rG   r   r   r   rG   r   r   r   )r   r   r   r   r   rK   r   rF   rI   rJ   rK   rF   r   r   r   r   )rG   r   r   rG   rG   r   r   r   r   r   r   r   rG   r   r   r   )r   r   r   r   r   rF   rI   rJ   rK   rF   r   r   r   r   )rG   r   r   rG   rN   r   r   r   r   r   rG   r   r   r   )r   r   rF   rI   rJ   rK   rF   r   r   r   r   )rG   r   rG   r   r   r   r   rG   r   r   r   )r   r   rF   rI   rO   rJ   rK   rF   r   r   r   r   )rG   r   rG   rG   r   r   r   r   rG   r   r   r   )rG   r   rG   rN   r   r   r   rG   r   r   r   )rG   r   rG   r   r   r   r   rG   r   r   r   )	r   rF   rF   r   rS   r   r   r   r   )	rG   r   rG   r   r   r   r   r   rG   zDfuse_rotary_attention: failed to match past/present concat in k pathz.fuse_rotary_attention: failed to match k nodes)r   r   r   r   )r   r   r   r   )rF   r   rS   r   r   r   r   )r   r   r   r   r   r   rG   z.fuse_rotary_attention: failed to match q nodeszKfuse_rotary_attention: failed to find the same root_input for q, k, v pathsr/   z;fuse_rotary_attention: failed to verify runtime shape paths	_output_0rG   c                 .   
j                   j                  | dd      }|t        j                  d       y
j                   j	                  |j
                  d         }
j                   j	                  |j
                  d         }||t        j                  d       y|d   }|d   }||z  }
j                   j                  d	d
      }
j                   j                  |      &
j                  |t        j                  dg|gd       
j                   j                  dd      }t        j                  d|j
                  d   |j
                  d   |g|dz   g|      }	|	j                  j                  t        j                  dd      g       |	S )zDetect num_heads and hidden_size for ONNX model from phi-2
            Args:
                reshape_q (NodeProto): reshape node for q
            Returns:
                hidden_size_concat_node(NodeProto): Concat node to be used by reshape
            rF   rG   NzEfuse_rotary_attention: failed to trace the concat node from reshape_qrN   r   zMfuse_rotary_attention: failed to get constant nodes of num_heads or head_sizer   Initializerr   name_prefixF)r3   	data_typedimsvalsrawhidden_size_concatoutput_0r0   axis)r   match_parentr6   r7   get_constant_valuer    r8   get_initializeradd_initializerr	   INT64r
   r:   r<   r9   r=   )r   concatnum_head_constant_nodehead_size_constant_nodenum_head_valuehead_size_valuer   hidden_size_initilizerhidden_size_reshape_node_namehidden_size_concat_noder   s             r   create_hidden_size_concat_nodezBFusionRotaryAttention.fuse.<locals>.create_hidden_size_concat_node  s    ZZ,,Y!DF~de &*ZZ%B%B6<<PQ?%S"&*jj&C&CFLLQRO&T#%-1H1Plm3A6N5a8O(?:K%)ZZ%@%@\i%@%j"zz))*@AI$$/)//% %  -1JJ,G,G^r,G,s)&,&6&6LLOLLO*
 7CD2	'# $--44f6K6KFTU6V5WX**r   r   r0   perm)r   rN   rG   r   z?fuse_rotary_attention: failed to create hidden_size_concat_noder   concat_k_halfr   concat_q_halfzSfuse_rotary_attention: failed to create multi-head attention with rotary embeddingsr   r   T)op_typer   rW   r6   r7   match_parent_paths_allr    r!   lenreshape_add_qkr   r   r3   r8   r
   r:   r<   r9   r=   nodes_to_addappendthis_graph_namenode_name_to_graph_namerD   nodes_to_remove&add_nodes_to_remove_with_nodes_to_keepprune_graph)Wr   normalize_nodeinput_name_to_nodesr   	qkv_nodesqkv_nodes_1qkv_nodes_2qkv_nodes_3rg   rX   rY   
matmul_qkvr   r(   r*   past_seq_lenv_nodesadd_v	v_nodes_1	v_nodes_2	v_nodes_3	v_nodes_4	v_nodes_5r\   r   r]   matmul_vr   transpose_vr   qk_nodesr&   	matmul_qkr%   
add_qk_strattn_mask_nodes_1attn_mask_nodes_2attn_mask_nodes_3attn_mask_nodes_4attn_mask_nodes_5attn_mask_nodes_6attn_mask_nodes_7slice_mask_1slice_mask_2r'   r)   k_nodesslice_kr   	k_nodes_1	k_nodes_2	k_nodes_3	k_nodes_4	k_nodes_5r[   r   rotary_kmatmul_kr   shared_past_seq_lenr   q_nodesslice_qr   	q_nodes_1	q_nodes_2	q_nodes_3rZ   rotary_qmatmul_qr   root_outputr   k_transpose_node_namek_tranpose_output_namek_transpose_nodeq_transpose_node_nameq_tranpose_output_nameq_transpose_noder   concat_k_reshape_node_nameconcat_k_reshape_nodeconcat_q_reshape_node_nameconcat_q_reshape_nodenew_nodenodes_to_keep	temp_pathsW   `                                                                                      r   fusezFusionRotaryAttention.fuseF  s   !!)nn
 	jj22C

 jj228

 jj22E

 "=H:A}a
#I$,7)A{Az#I$/:,Aq+q*#ILLKL +5'	<JJ00P
	
 JJ008
	
 JJ00.
	
 **;; c)
 <!&$ E',  ?#( 9$ v/
 3  v/
 v/OkX !%] < o
9a` JJ00?
	
  AJ>KHahG JJ88+&AM
 $cd"1%++A.F(,2215L *I"9B6Hk9hG^^A&F *I"/8,KHG#**1-I"s9~':9B1bc9J6Hk9hG^^A&F *I"@I=Hk9eXHG^^A&F *ILLHI::///

 '	&.#Avq)LLJK !'	: JJ88(

 !JJ880

 !JJ88O!

 !JJ88H

 !JJ88Y$

 !JJ88R!

 !JJ88a'

 (,=)A|\$++A.I*/@,Aq,$++A.I*,,->q-A-H-H-KLJ*,,->q-A-H-H-KLJ**1-44Q7J**1-44Q7J*,,->q-A-H-H-KLJLLVW
 #	JJ00X
	
 JJ00N
	
 JJ00X
	
 **;;
 0" B%*( K+0$ E',  ?#( 6" 9$ 6" 6gcH !%M < g
9aP JJ00r'
	
  >G;KHa8G JJ88+&AM
 $cd"1%++A.F"/"3"9"9!"< *I#6666"2;/AxIxG *I"<E9Ax1iG^^A&F *I"s9~':"+A,rs"3Ix!*1b!4HhG^^A&F *I"W`TAx'1iHG^^A&F *ILLIJ
 JJ00A
	
 JJ00A
	
 JJ00[!
	
  1:.KHhG"/8,HaHG"JSGM8WaAxGLLIJ>>!q 11hnnQ6G8>>Z[K\6\LLfg#>>q!	 Z['..q1K;44;;q! Z[%,,Q/K
 6=q 1(//RSBTHNN15<q 1(//RSBTHNN1 $%-]][%@"K'%abM	2	+j ]$(JJ$?$?$L!%:[%H"%//%,,Q/0/0*	  &&--v/D/DV\/Z.[\ %)JJ$?$?$L!%:[%H"%//%,,Q/0/0*	  &&--v/D/DV\/Z.[\&DY&O#&.^_ *.)D)DY\k)D)l&$*$4$4(//24K4R4RST4UV3kAB/	%! *.)D)DY\k)D)l&$*$4$4(//24K4R4RST4UV3kAB/	%! -H,H$$%<=$$%56$$%56$$%:;$$%:;IMI]I]D(()@)E)EFBFBVBVD(()9)>)>?BFBVBVD(()9)>)>?GKG[G[D(()>)C)CDGKG[G[D(()>)C)CD''NN1
 LLno  *6:6J6J$$X]]3##IabM2i  ''7SVTV<X$QZ^,M$	;;I}U % 	##H-i  ''5	!  ''
3  ''
3  ''
3	!  ''
3  ''
3  ''
3  ''
3	!  ''
3  ''
3	!$QZ^WQZ^<M$	;;I}U % i  ''5   	!  ''
3  ''
3r   )r/   r/   r/   r/   r/   r/   N)__name__
__module____qualname____doc__r   intr   strr   r   r>   r   rD   r   r   r  __classcell__r   s   @r   r   r      s    

 
 	
6 !%66 6 	6
 6 6 6 6 6 6 6 6 6 
y$	6pZxGRK r   r   c            
       b     e Zd Zdef fdZdedefdZdefdZde	d	e	d
e	de	de	f
dZ
d Z xZS )FusionRotaryEmbeddingsr   c                 ~    d| _         t        | 	  || j                   | j                   | j                   dz   dg       y )Nr   z.1r   )	base_namer   r   )r   r   r   s     r   r   zFusionRotaryEmbeddings.__init__U  s5    *RVAVX]0^_r   rot_emb_nodefunctionc                 >   g g }}|j                   D ]  }|j                  dk(  s|j                  g k(  s#|j                  d   |j                  v s?|j	                  |       t        |j                        j                  |j                  d         }|j	                  |j                  |           g }|D ]q  }|j                  d   j                  }	| j                  j                  d      |	_        | j                  j                  |	       |j	                  |	j                         s t        ||      D ]c  \  }
t        t        fd| j                  j                  j                  j                               }|D ]  }t!        j"                  ||
        e |S )NConstantr   c                      | j                   v S N)r    )entryextra_outputs    r   <lambda>z?FusionRotaryEmbeddings.reassign_extra_outputs.<locals>.<lambda>o  s    8Sr   )noder   r    r!   r   listindexr<   tr   r8   r3   r   zipfiltergraphr   replace_node_input)r   r'  r(  extra_constantsextra_outputsfn_nodeoutput_indexextra_initializersextra_constantconstant_tensorprotoextra_initializernodes_to_updatenode_to_updater.  s                @r   reassign_extra_outputsz-FusionRotaryEmbeddings.reassign_extra_outputs\  sf   )+R}}G*,"1DXYIZ^f^m^mIm&&w/#HOO4::7>>!;LM$$\%8%8%FG	 %  -N#1#;#;A#>#@#@ (,

(C(CJ(O %JJ&&';<%%&:&?&?@	 . 03=BT/U+L+"6*SUYU_U_UeUeUkUkUpUp#qrO"1,,^\K\] #2 0V
 r   r0  c                    | j                   j                  | j                        }| j                   j                  ddgddg      }||\  }}nt        j                  d       y |j                  d   j                  d   g}t        t        fd| j                   j                   j                  j                              }t        t        fd| j                   j                   j                  j                              }d\  }	}
t        |      dk(  rt        |      dk(  r| j                   j                  |	      | j                   j                  |
      t        j                  |d   j                   d   j"                        j%                         }t        j                  |d   j                   d   j"                        j%                         }t'        j(                  |	t*        j,                  t        |j.                        |j1                         j3                         	      }| j                   j5                  || j6                         t'        j(                  |
t*        j,                  t        |j.                        |j1                         j3                         	      }| j                   j5                  || j6                         | j8                  j;                  |d   |d   g       |j;                  |	|
g       j                  }t        |      dkD  rt        t        fd
| j                   j                   j<                              }t        |      dk(  sJ | j?                  |d         t        t        fd|            }t        |      dk(  sJ t'        j@                  | j                  |||d      }d|_!        | j8                  jE                  |       |S )Nr   r   r   z.fuse_rotary_embeddings: failed to match MatMulrG   c                 B    | j                   d   j                  d   k(  S )Nr   rN   r!   r    constantr0  s    r   r/  zOFusionRotaryEmbeddings.create_rotary_embeddings_from_function.<locals>.<lambda>      hooa6HDJJWXM6Yr   c                 B    | j                   d   j                  d   k(  S )Nr   r   rE  rF  s    r   r/  zOFusionRotaryEmbeddings.create_rotary_embeddings_from_function.<locals>.<lambda>  rH  r   	cos_cache	sin_cacher3   r   r   r   c                 6    | j                   j                  k(  S r,  )r3   r   )fnr0  s    r   r/  zOFusionRotaryEmbeddings.create_rotary_embeddings_from_function.<locals>.<lambda>  s    "''T\\*Ar   c                     | vS r,   )output_namer9  s    r   r/  zOFusionRotaryEmbeddings.create_rotary_embeddings_from_function.<locals>.<lambda>  s    TaAar   r1   r2   r3   interleavedr4   )#r   r8   r&  rW   r6   r7   r!   r    r1  r5  r6  r0  r   r   r   to_arrayr<   r3  squeezer
   make_tensorr	   FLOATshapeflattentolistr   r   r   r9   	functionsrB  r:   r;   r   )r   r0  rotary_emb_node_namematmul_pathreshape_nodematmul_noderotary_emb_inputscos_cache_nodesin_cache_nodecos_cache_namesin_cache_namerK  rL  cos_cache_tensorsin_cache_tensorrotary_emb_outputsfuncrotary_emb_noder9  s    `                @r   &create_rotary_embeddings_from_functionz=FusionRotaryEmbeddings.create_rotary_embeddings_from_functionu  sH   #zz::4>>Jjj22!F

 "(3%L+LLIJ q!JJqM
 f%Y[_[e[e[k[k[q[q[v[vwxf%Y[_[e[e[k[k[q[q[v[vwx)A& 1$N#q(

**>:B

**>:B$--nQ.?.I.I!.L.N.NOWWYI$--nQ.?.I.I!.L.N.NOWWYI%11#%++)//*&&(//1	  JJ&&'79M9MN%11#%++)//*&&(//1	  JJ&&'79M9MN  ''):N1<M(NO  ..!AB![[!"Q&A4::CSCSC]C]^_Dt9>!> 77d1gFM!%f-acu&v!w)*a/// **NN$&%
 "1##L1r   r^   position_ids	cos_slice	sin_slicer!   c                    | j                   j                  | j                        }t        t	        fd| j                   j                   j
                  j                              }t        t	        fd| j                   j                   j
                  j                              }d\  }	}
t        |      dk(  rt        |      dk(  r| j                   j                  |	      | j                   j                  |
      t        j                  |d   j                  d   j                        j                         }t        j                  |d   j                  d   j                        j                         }|j                  d   }|d d d |dz  f   }|d d d |dz  f   }t        j                   |	t"        j$                  t        |j                        |j'                         j)                               }| j                   j+                  || j,                         t        j                   |
t"        j$                  t        |j                        |j'                         j)                               }| j                   j+                  || j,                         | j.                  j1                  |d   |d   g       t        j2                  | j                  |||	|
g|g|d      }d	|_        |S )
Nc                 (    | j                   d   k(  S Nr   r!   )rG  rm  s    r   r/  zLFusionRotaryEmbeddings.create_rotary_embeddings_from_nodes.<locals>.<lambda>      hooa6HI6Ur   c                 (    | j                   d   k(  S rq  rr  )rG  rn  s    r   r/  zLFusionRotaryEmbeddings.create_rotary_embeddings_from_nodes.<locals>.<lambda>  rs  r   rJ  rG   r   rN   rM  rS  r4   )r   r8   r&  r1  r5  r6  r0  r   r   r   rU  r<   r3  rV  rY  r
   rW  r	   rX  rZ  r[  r   r   r   r9   r:   r;   )r   r^   rl  rm  rn  r!   r]  rb  rc  rd  re  rK  rL  	head_sizerf  rg  rj  s      ``            r   #create_rotary_embeddings_from_nodesz:FusionRotaryEmbeddings.create_rotary_embeddings_from_nodes  s}     $zz::4>>J f%UW[WaWaWgWgWmWmWrWrstf%UW[WaWaWgWgWmWmWrWrst)A& 1$N#q(

**>:B

**>:B$--nQ.?.I.I!.L.N.NOWWYI$--nQ.?.I.I!.L.N.NOWWYI "*I!!%7	Q%7"78I!!%7	Q%7"78I%11#%++)//*&&(//1	  JJ&&'79M9MN%11#%++)//*&&(//1	  JJ&&'79M9MN  ''):N1<M(NO **NNnnMH%
 "1r   c                 &  % | j                   |j                  vr|j                  dk7  ry d %|j                  dk7  rt        |j                        dvs|j                  d   dvrt        j                  d       y | j                  |      %%t        j                  d       y | j                  j                  |       t        t        %fd| j                  j                  j                  j                              }t        |      dk(  sJ | j                  j                  j                  j                  j                  |d          n| j                  j                  |g d	g d
      }| j                  j                  |g dg d
      }|xs |}| j                  j                  |g dg d      }| j                  j                  |g dg d      }	|xs |	}
||
t        j                  d       y | j                  j                  |g dg d      }| j                  j                  |g dg d      }|xs |}| j                  j                  |g dg d      }| j                  j                  |g dg d      }|xs |}||t        j                  d       y |d   j                   |d   j                   k7  s]|d   j                   |
d   j                   k7  s>|d   j                   |d   j                   k7  s|d   j                   |
d   j                   k7  rt        j                  d       y | j                  j                  |ddgddg      }| j                  j                  |ddgddg      }|xs |}|t        j                  d       y d\  }}}| j                  j                  |g dg d      }| j                  j                  |g d g d!      }| j                  j                  |g d"g d#      }| j                  j                  |g d$g d%      }||}|d&   j                  d   }n||}|d'   j                  d   }nh|'|}|d&   j                  d   }|d(   j                  d   }n?|'|}|d'   j                  d   }|d(   j                  d   }nt        j                  d)       y d*\  }}| j                  j                  |g dg d+      }| j                  j                  |g d g d,      }| j                  j                  |g d"g d-      }| j                  j                  |g d$g d.      } ||}|d&   j                  d   }n||}|d'   j                  d   }nh|'|}|d&   j                  d   }|d(   j                  d   }n?| '| }|d'   j                  d   }|d(   j                  d   }nt        j                  d)       y |d/k(  r| j                  j                  |d(   d0gdg      }!| j                  j                  |d(   d0gdg      }"|!!|"|!d   j                   |"d   j                   k7  rt        j                  d1       y |"d   j                  d   }ng }!g }"d2\  }#}$||k(  r||k(  s
||k(  rZ||k(  rU|d3   j                   |d3   j                   k7  s |d   j                   |d   j                   k7  rt        j                  d4       y ||k(  r||k(  s
||k(  r|| k(  r|d   j                   |d   j                   k7  rt        j                  d5       y | j                  j                  |d   d6d7gddg      }#| j                  j                  |d   g d8g d9      }$|#?|$=| j                  j#                  |#d   j                  d         |$d   j                  dk7  r+t        j                  d:       y t        j                  d;       | j%                  |d   j&                  d   ||||j&                  d         %%t        j                  d       y | j)                  |g       | j)                  |d d        | j)                  |d d        | j)                  |d d        | j)                  |
d d        | j)                  |d d        | j)                  |       | j)                  |       | j)                  |!d d        | j)                  |"d d        |#;t        | j                  j+                  |#d               dk(  r| j)                  |#       |$| j)                  |$d d        | j-                  | j                          | j.                  | j0                  %j                   <   | j2                  j                  %       d<| _        y )=Nr   >   r      rG   >   pospos_idpos_idsposition_idrl  zLfuse_rotary_embeddings: failed to verify inputs for RotaryEmbedding functionz=fuse_rotary_embeddings: failed to create RotaryEmbedding nodec                 <    | j                   j                  d   k(  S rq  )r3   r!   )r0  rj  s    r   r/  z-FusionRotaryEmbeddings.fuse.<locals>.<lambda>  s    DII1G1G1J$Jr   r   )rO   rF   NegrS   r   r   )rO   rF   r~  rS   rS   )	rO   rF   r~  rS   rI   r   rJ   rK   r   )	rG   r   r   r   rG   r   r   r   r   )	rO   rF   r~  rS   rI   r   rJ   rK   rS   z9fuse_rotary_embeddings: failed to match x2 in rotate_half)rO   rF   rS   r   )rG   r   rG   r   )rO   rF   rS   rS   )rO   rF   rS   rI   r   rJ   rK   r   )rG   r   rG   rN   r   r   r   r   )rO   rF   rS   rI   r   rJ   rK   rS   z9fuse_rotary_embeddings: failed to match x1 in rotate_halfr   zCfuse_rotary_embeddings: failed to match common input in rotate_halfrO   r   rS   z8fuse_rotary_embeddings: failed to match x in rotate_half)Nr/   r/   )	rO   rI   rJ   Squeezer  rS   rI   rJ   rK   )	rG   rG   r   r   r   r   rN   r   r   )rO   rI   rJ   r  r  rS   rI   r   )rG   rG   r   r   r   r   rN   r   )rO   rI   rJ   rS   rI   rJ   rK   )rG   rG   r   r   rN   r   r   )rO   rI   rJ   rS   rI   r   )rG   rG   r   r   rN   r   r   r   rN   z>fuse_rotary_embeddings: failed to match sin path in apply_rope)Nr/   )	r   rG   r   r   r   r   rN   r   r   )r   rG   r   r   r   r   rN   r   )r   rG   r   r   rN   r   r   )r   rG   r   r   rN   r   r/   r   zGfuse_rotary_embeddings: failed to match position ids path in apply_roper   r   zdfuse_rotary_embeddings: failed to match common Gather node and Shape node in sin cache and cos cachezRfuse_rotary_embeddings: failed to match common Add node in sin cache and cos cacherJ   rK   )rJ   rK   r   rL   zKfuse_rotary_embeddings: failed to match past_seq_len and curr_seq_len pathsz:fuse_rotary_embeddings: failed to match common cache pathsT)r&  r   r   r    r6   r7   rk  r   r   r1  r5  r   r6  
value_inforemoverW   r3   find_graph_inputrv  r!   add_nodes_to_removeget_childrenr?   r   r   r   r   )&r   r0  r   r   old_shape_inferrotate_half_x2_path_1_1rotate_half_x2_path_1_2rotate_half_x2_path_1rotate_half_x2_path_2_1rotate_half_x2_path_2_2rotate_half_x2_path_2rotate_half_x1_path_1_1rotate_half_x1_path_1_2rotate_half_x1_path_1rotate_half_x1_path_2_1rotate_half_x1_path_2_2rotate_half_x1_path_2x_path_1x_path_2x_pathsin_pathrL  rl  
sin_path_1
sin_path_2
sin_path_3
sin_path_4cos_pathrK  
cos_path_1
cos_path_2
cos_path_3
cos_path_4position_ids_from_sin_pathposition_ids_from_cos_pathpast_seq_len_pathcurr_seq_len_pathrj  s&                                        @r   r  zFusionRotaryEmbeddings.fuse  s	   >>-$,,%2G <<5 4::f,

1 F 1 kl"II$OO&\]   ''- #JDJJL\L\LbLbLmLmnO '1,,,JJ""--44_Q5GH( '+jj&B&B>'# '+jj&B&B:'# %<$V?V!&*jj&B&Be+'# '+jj&B&Ba+'# %<$V?V!$,0E0MXY&*jj&B&B7'# '+jj&B&B3'# %<$V?V!&*jj&B&B^('# '+jj&B&BZ('# %<$V?V!$,0E0MXY &b)..2G2K2P2PP(,115J25N5S5SS(,115J25N5S5SS(,115J25N5S5SSbc zz33$AH zz33 AH )F~WX 1=-Hi55m+J
 55a(J
 55W%J
 55K"J
 %%$RL..q1	'%$RL..q1	'%$RL..q1	'{003'%$RL..q1	'{003]^ #+Hi55m+J
 55a(J
 55W%J
 55K"J
 %%$RL..q1	'%$RL..q1	'%$RL..q1	'{003'%$RL..q1	'{003]^ r!-1ZZ-I-IQKKC.*
 .2ZZ-I-IQKKC.* /6191!499=WXY=Z=_=__LL!jk9!<BB1E-/*-/*3=00J&8z+AJ&8z+AB<$$(9(99Xb\=N=NRZ[]R^RcRc=cLL~ j(X-CJ&8z+AB<$$(9(99LL!uv$(JJ$@$@RLw'F%! %)JJ$@$@RL4%! &-(0zz223DR3H3N3Nq3QRZ(,44CLL!noYZ"FF%b)003AO &\] $$dV,$$%:3B%?@$$%:3B%?@$$%:3B%?@$$%:3B%?@$$VCR[1$$X.$$X.$$%?%DE$$%?%DE ,TZZ5L5LM^_`Ma5b1cgh1h (():; ,(():3B)?@dnn-=A=Q=Q$$_%9%9:  1r   )r  r  r  r   r   r   r   rB  rk  r   rv  r  r!  r"  s   @r   r$  r$  T  so    `i `9  2H9 HT66 6 	6
 6 6pA r   r$  )loggingtypingr   r   fusion_attentionr   fusion_baser   onnxr   r   r	   r
   r   
onnx_modelr   	getLoggerr  r6   r   r$  rQ  r   r   <module>r     sK   
  " ,  L L  			8	$A O A H"d V d r   