
    g3                     h    d dl mZ d dlZd dlmZ d dlmZ d dlm	Z	  ee
      Zd Z G d de      Zy)	    )	getLoggerN)FusionGptAttentionPastBase)helper)	OnnxModelc                 $    t        | |z
        dk  S )Ngư>)abs)valueexpected_values     k/var/www/openai/venv/lib/python3.12/site-packages/onnxruntime/transformers/fusion_gpt_attention_megatron.pyis_closer      s    u~%&$..    c                   >     e Zd ZdZdedef fdZd Zd Zd Z	 xZ
S )FusionGptAttentionMegatronz^
    Fuse GPT-2 Attention with past state subgraph from Megatron into one Attention node.
    model	num_headsc                 &    t         |   ||       y )N)super__init__)selfr   r   	__class__s      r   r   z#FusionGptAttentionMegatron.__init__   s    	*r   c                 R   | j                   j                  d      }| j                  |      }	|j                  d   }
|j                  d   |j                  d   k(  rdnd}t        j                  d||j                  d   |j                  |   |	|g|
|g|      }d|_        |j                  j                  t        j                  d| j                        t        j                  dd      g       | j                  C|j                  j                  t        j                  d	t        | j                              g       |g}| j                  j                  |       |D ]%  }| j                  | j                   |j"                  <   ' | j$                  j'                  |       d
| _        y )NGptAttentionr      	Attention)inputsoutputsnamezcom.microsoftr   unidirectionalmask_filter_valueT)r   create_node_namecast_attention_maskoutputinputr   	make_nodedomain	attributeextendmake_attributer   r   floatnodes_to_addthis_graph_namenode_name_to_graph_namer   nodes_to_removeappendprune_graph)r   matmul_before_splitadd_before_splitpastpresentr#   reshape_qkvmaskattention_node_name
int32_maskr"   iattention_noder*   nodes                  r   fuse_attention_nodez.FusionGptAttentionMegatron.fuse_attention_node   s    #jj99.I--d3
##A&"((+/B/I/I!/LLAST))#))!, &&q) W%$
 !0  ''%%k4>>B%%&6:	
 !!-$$++V-B-BCVX]^b^t^tXu-v,wx&'  . D6:6J6JD((3 ! 	##K0  r   c                 D   | j                   j                  |g dg d      }|t        j                  d       y |\  }}}}	t	        |      dkD  r@|d   j
                  dk(  r.| j                   j                  |d         \  }
}|dk7  r| | _        |j                  d   |j                  d   k7  rt        j                  d       y | j                  j                  |dd	      st        j                  d
       y | j                  j                  |dd      st        j                  d       y | j                   j                  |	j                  d         st        j                  d       y | j                  j                  |ddg      st        j                  d       y | j                  j                  |ddg      st        j                  d       y| j                  j                  |ddg      st        j                  d       y| j                  j                  |	ddg      st        j                  d       y | j                  j                  |	ddg      st        j                  d       y | j                   j                  |g dg d      }||d   |k7  rt        j                  d       y | j                   j                  |	g dg d      }||d   |k7  rt        j                  d       y | j                   j                  |	g dg d      }||d   |k7  rt        j                  d       y | j                   j                  |	g dg d       }|!| j                   j                  |	g d!g d       }||d   |k7  rt        j                  d"       y |	j                  d   S )#N)MulSubSlicer?   )r   r   r   r   z8fuse_attention: failed to match unidirectional mask pathr   r   r=   i'  zCfuse_attention failed: mul_qk.input[1] != last_slice_mask.output[0]g     @z?fuse_attention failed: mul_mask input 1 is not constant 10000.0g      ?z;fuse_attention failed: sub_mask input 0 is not constant 1.0z+expect slick_mask input 0 to be graph inputzKfuse_attention failed: last_slice_mask input 1 (starts) is not constant [0]   zIfuse_attention failed: last_slice_mask input 3 (axes) is not constant [3]F   zJfuse_attention failed: last_slice_mask input 4 (steps) is not constant [1]   zDfuse_attention failed: slice_mask input 3 (axes) is not constant [2]zEfuse_attention failed: slice_mask input 4 (steps) is not constant [1])	UnsqueezeGatherShapeMatMul)rB   r   r   r   z/fuse_attention: failed to match last slice pathz0fuse_attention: failed to match first slice path)rC   r>   rD   rE   rF   )r   r   r   r   r   z3fuse_attention: failed to match last slice sub path)rC   r>   rD   rE   LayerNormalization)r   r   r   r   r   )rC   r>   rD   rE   SkipLayerNormalizationz5fuse_attention: failed to match last slice sub path 1)r   match_parent_pathloggerdebuglenop_typeget_constant_inputr   r#   r"   utilscheck_node_input_valuefind_graph_inputinfo)r   sub_qkmul_qk	matmul_qklayernorm_before_attention
mask_nodesmul_masksub_masklast_slice_mask
slice_mask_mul_vallast_slice_pathfirst_slice_pathfirst_slice_subfirst_slice_sub_1s                   r   
match_maskz%FusionGptAttentionMegatron.match_maskJ   s5   ZZ11&:Z\hi
LLST<F98_jz?Q:a=#8#8E#A66z!}EJAw%*1&<<?o44Q77LL^_zz001gFLLZ[zz001cBLLVWzz**:+;+;A+>?KKEFzz00!aSILLfgzz00!aSILLdezz00!aSILLefzz00QDLL_`zz00QDLL`a**66G
 "ob&9Y&FLLJK::77BL
 #'7';y'HLLKL**66=

 "ob&9Y&FLLNO JJ88I
 $ $

 < <Q! $(9"(=A[([LLPQ""r   c           	      
   d }d }|j                   dk(  }d }|s$| j                  j                  |g dg d|      }n#| j                  j                  |g dg d|      }|y d }|s|\  }	}
}}}}|	j                  d   }n|\  }
}}}}|j                  d   }| j                  j                  |g dg d	      }|!| j                  j                  |g d
g d	      }|t        j                  d       y |\  }}}}}}}|j                   dk(  r(||j                  d   k7  rt        j                  d       y |j                   dk(  r(||j                  d   k7  rt        j                  d       y | j                  j                  |g dg d      }|t        j                  d       y |\  }}}}| j                  j                  |d      dk7  rt        j                  d       y | j                  ||||      }| j                  j                  |g dg d      }|t        j                  d       y |\  }}} }!||!k7  rt        j                  d       y | j                  j                  |g dg d      }"|"t        j                  d       y |"\  }#}$}%}&}'}(||(k7  rt        j                  d       y | j                  j                  |'      \  })}*t        |*t        j                        r9t        |*j                        dgk(  r |*d   dk(  r|*d   dk(  r|*d   dkD  r|*d   dkD  st        j                  d       y |*d   }+|+| j                  k7  r,t        j!                  d|+ d | j                          |+| _        |*d   },| j                  j                  |#      \  })}*t#        t        j$                  t        j$                  |,                  }-t'        |*|-      st        j                  d!|* d"|-        y | j                  j                  |      \  })}*t'        |*|-      st        j                  d#|* d"|-        y | j)                  |%||      }|t        j                  d$       y | j                  j+                  |      st        j                  d%       | j-                  ||      }|t        j                  d&       y | j                  j/                  |      st        j!                  d'       y | j1                  |||||j                  d   ||       y )(NrI   )Addre   rF   Reshape	TransposerF   )r   r   Nr   r   r   )output_name_to_node)re   rF   rf   rg   rF   )r   Nr   r   r   r   )Concatrg   rf   Splitre   rF   rH   )r   r   r   r   r   Nr   )ri   rg   rf   rj   re   rF   rI   z&fuse_attention: failed to match v pathrH   zAfuse_attention: skip_input != layernorm_before_attention.input[0]r@   )Softmaxr>   r=   rF   )r   r   r   r   z'fuse_attention: failed to match qk pathaxisz+fuse_attention failed: softmax_qk axis != 3)Divrg   rf   rj   z&fuse_attention: failed to match q pathz-fuse_attention: skip since split_v != split_q)rm   rg   ri   rg   rf   rj   )r   r   r   r   r   r   z&fuse_attention: failed to match k pathz-fuse_attention: skip since split_v != split_krA   r   rB   z:fuse_attention: reshape constant input is not [0, 0, N, H]zDetected num_heads=z. Ignore user specified value zfuse_attention: div_k value=z
 expected=zfuse_attention: div_q value=z!fuse_attention: match past failedz(fuse_attention: past is not graph input.z$fuse_attention: match present failedz1fuse_attention: expect present to be graph output)rN   r   rJ   r#   rK   rL   r"   get_node_attributerc   rO   
isinstancenpndarraylistshaper   rS   r)   sqrtr   match_past_pattern_2rR   match_presentfind_graph_outputr;   ).r   normalize_nodeinput_name_to_nodesrh   r2   r3   is_normalize_node_skiplayernorm	qkv_nodes
skip_inputadd_skipadd_after_attentionmatmul_after_attentionr4   transpose_qkv
matmul_qkvv_nodesconcat_vtranspose_v	reshape_vsplit_vr1   r0   rW   qk_nodes
softmax_qkrT   rU   rV   attention_maskq_nodesdiv_qtranspose_q	reshape_qsplit_qk_nodesdiv_kr]   concat_ktranspose_k	reshape_ksplit_kr8   r	   r   hidden_size_per_headr
   s.                                                 r   fusezFusionGptAttentionMegatron.fuse   s   *8*@*@D\*\'	.

44J%$7	 5 I 

44C"$7	 5 I 
. #& "*J #& (--a0J**.. %
 ?jj22 )G ?LLAB 	
& '..2FF8>>qAALL\] '..2JJ8??BBLL\]:://
<_amnLLBC2:/VVY::((V<ALLFGD^_**..y:bdpq?LLAB3:0YgLLHI**..K

 ?LLAB@G=8[)WgLLHI::00;5ubjj)U[[!aS(aAaAa1a1LLUV!H	&KK-i[8VW[WeWeVfgh&DN$Qx::0075rwwrww/C'DEF~.LL7wjHXYZ::0075~.LL7wjHXYZ ((8=PQ<LL<=zz**40LLCD $$X/BC?LL?@zz++G4KKKL  &--a0	
r   )__name__
__module____qualname____doc__r   intr   r;   rc   r   __classcell__)r   s   @r   r   r      s.    +i +C +- ^X#t
r   r   )loggingr   numpyrp   fusion_gpt_attentionr   onnxr   
onnx_modelr   r   rK   r   r    r   r   <module>r      s4   
   ;   	8	/P
!; P
r   