
    gU                         d dl mZ d dlZd dlmZ d dlmZ d dlm	Z	 d dl
mZ  ee      Z G d de      Z G d	 d
e      Zy)    )	getLoggerN)Fusion)FusionUtils)helper)	OnnxModelc                   D     e Zd ZdZdedef fdZd Zd Zd Z	d Z
 xZS )	FusionGptAttentionPastBasez3Base class for GPT Attention Fusion with past statemodel	num_headsc                 x    t         |   |dddgd       || _        t        |      | _        i | _        d | _        y )N	AttentionLayerNormalizationSkipLayerNormalizationz	with past)super__init__r   r   utilscasted_attention_maskmask_filter_valueselfr
   r   	__class__s      b/var/www/openai/venv/lib/python3.12/site-packages/onnxruntime/transformers/fusion_gpt_attention.pyr   z#FusionGptAttentionPastBase.__init__   sA    .BD\-]_jk" '
%'"!%    c                    | j                   j                  |d|      }||j                  dk7  rt        j	                  d       y | j                   j                  |d      dk7  rt        j	                  d       y |j                  d   }| j                   j                  |d|      }|r|j                  dk(  r|}n>| j                   j                  |ddgddg      }|t        j	                  d       y |d   }| j                   j                  |d      dk7  rt        j	                  d	       y |j                  d   }	||	k7  rt        j	                  d
       y |S )Nr   Gatherz,match_past_pattern_1: expect Gather for past   z9match_past_pattern_1: expect indices=1 for Gather of past	Transposez7match_past_pattern_1: failed match Transpose and Gatherz;match_past_pattern_1: expect indices=0 for Gather k of pastz,match_past_pattern_1: expect past to be same)r
   
get_parentop_typeloggerdebugfind_constant_inputinputmatch_parent_path)
r   concat_kconcat_voutput_name_to_nodegatherpastparentgather_past_kpast_k_nodespast_ks
             r   match_past_pattern_1z/FusionGptAttentionPastBase.match_past_pattern_1   s9   & &&x4GH>V^^x7LLGH::))&!49LLTU||A&&x4GHfnn0"M::77;PXBY\]_`[abL#VW(,M::))-;q@LLVW$$Q'6>LLGHr   c                 (   | j                   j                  |d|      }||j                  dk7  rt        j	                  d       y | j                   j                  |d|      }||j                  dk7  rt        j	                  d       y | j                   j                         }|dk  r]t        j                  |ddg      st        j	                  d       y t        j                  |d	d
d
g      st        j	                  d       y | j                  j                  |d
dg      st        j	                  d       y | j                  j                  |d
d
d
g      st        j	                  d       y t        j                  |ddd      st        j	                  d       y |j                  d   }| j                   j                  |ddgddg      }|t        j	                  d       y |d   j                  d   }	||	k7  rt        j                  d       y |S )Nr   Squeezez:match_past_pattern_2: expect Squeeze as parent of concat_vSplitz0match_past_pattern_2: expect Split for past path   axesz:match_past_pattern_2: axes != [0] for Squeeze in past pathsplitr   z<match_past_pattern_2: split != [1, 1] for Split in past pathaxis)default_valuezKmatch_past_pattern_2: attribute axis of Split are not expected in past pathz7match_past_pattern_2: failed to match past_k_nodes pathr   z,match_past_pattern_2: expect past to be same)r
   r   r    r!   r"   get_opset_versionr   check_node_attributer   check_node_input_valuer$   r%   info)
r   r&   r'   r(   squeezer5   opset_versionr*   r-   r.   s
             r   match_past_pattern_2z/FusionGptAttentionPastBase.match_past_pattern_2K   s   , **''!5HI?goo:LLUV

%%gq2EF=EMMW4LLKL

446233GVaSIYZ33E7QFK[\::44Wa!EYZ::44UA1vF[\//vqPQRLLfg{{1~zz33Hy'>RUVXYTZ[LLRSb!''*6>KKFGr   c                     | j                   j                  |d|d      }|st        j                  d       y | j                   j                  |d|d      }|st        j                  d       y |j                  d   }|S )N	UnsqueezeF)	recursivezexpect unsqueeze for presentConcatzexpect concat for presentr   )r
   find_first_child_by_typer!   r;   output)r   r'   input_name_to_nodesunsqueeze_present_vconcat_presentpresents         r   match_presentz(FusionGptAttentionPastBase.match_present   s    "jjAAk#6% B 
 #KK67<<+>% = 
 KK34 ''*r   c                 2   || j                   v r| j                   |   }|S | j                  j                  |      r/| j                  j	                  |      \  }}|| j                   |<   |S | j                  j                  |      \  }}|| j                   |<   |S N)r   r
   find_graph_inputr   cast_graph_input_to_int32cast_input_to_int32)r   
input_nameattention_mask_input_namecasted	cast_nodes        r   cast_attention_maskz.FusionGptAttentionPastBase.cast_attention_mask   s    333(,(B(B:(N% )( ZZ((404

0T0TU_0`-F-5ND&&z2 )( 48::3Q3QR\3]0%y5ND&&z2((r   )__name__
__module____qualname____doc__r   intr   r/   r>   rI   rS   __classcell__r   s   @r   r	   r	      s0    =&i &C &/bAF"	)r   r	   c                   8     e Zd ZdZdedef fdZd Zd Z xZ	S )FusionGptAttentionzP
    Fuse GPT-2 Attention with past state subgraph into one Attention node.
    r
   r   c                 &    t         |   ||       y rK   )r   r   r   s      r   r   zFusionGptAttention.__init__   s    	*r   c
                    | j                   j                  d      }
t        j                  d|||||g|
dz   |g|
      }d|_        |j
                  j                  t        j                  d| j                        t        j                  d|	rdnd	      g       | j                  C|j
                  j                  t        j                  d
t        | j                              g       t        j                  d|
dz   |j                  d   g|
dz   g|
dz         }t        j                  d|
dz   |j                  d   g|g|
dz         }| j                  j                  |||g       | j                  | j                  |j                  <   | j                  | j                  |j                  <   | j                  | j                  |j                  <   y )NGptAttentionr   _output)inputsoutputsnamezcom.microsoftr   unidirectionalr   r   r   MatMul_matmul_output_matmulAdd   _add)r
   create_node_namer   	make_nodedomain	attributeextendmake_attributer   r   floatr$   nodes_to_addthis_graph_namenode_name_to_graph_namerc   )r   	fc_weightfc_biasgemm_qkvr*   rH   r$   rD   maskis_unidirectionalattention_node_nameattention_nodematmul_nodeadd_nodes                 r   create_attention_nodez(FusionGptAttention.create_attention_node   s    #jj99.I))9gtT:(94g>$	
 !0  ''%%k4>>B%%&6=NTUV	
 !!-$$++V-B-BCVX]^b^t^tXu-v,wx&&')3X^^A5FG(+;;<$y0	
 ##'*::HNN1<MNH$v-	
 	  .+x!HI<@<P<P$$^%8%899=9M9M$$[%5%566:6J6J$$X]]3r   c                    d }d }g }|j                   dk(  }d }|s%| j                  j                  |g dg d||      }n$| j                  j                  |g dg d||      }|y d }	|s |\  }
}}}}}}|
j                  d|d   z
     }	n	|\  }}}}}}| j                  j                  |g d	g d
      }|t        j                  d       y |\  }}}}| j                  j                  |g dg d|      }|"| j                  j                  |g dg d|      }|| j                  j                  |g dg d|      }|"| j                  j                  |g dg d|      }|t        j                  d       y |d   j                  d   }| j                  j                  |d         \  }}|d   j                  |   }n$|d   j                  d   }|d   j                  d   }|d   }|	$|	|j                  vrt        j                  d       y d}d }d }d }| j                  j                  |g dg d      } | | \  }!}"}#}$}%| j                  j                  |"g dg d      }&|&t        j                  d       y |&d   }'|&d   }|$|'k7  rt        j                  d       y t        |&      dkD  r|&d   j                   dk(  r| j                  j                  |&d         \  }}(|(dk7  r|( | _        n| j                  j                  |g d g d!fg d"g d#fg|      \  }} }| t        j                  d$       y | d%   })| d&   }$| d   }%|dk(  r| d   }*| j                  j                  |*g d'g d(fg d)g d*fg d+g d,fg|      \  }}}|t        j                  d-       y t        |      dkD  r?|d   j                   dk(  r-| j                  j                  |d         \  }}(|(dk7  r|(| _        | j                  j                  |)g d.g d/fg d0g d1fg|      \  }}&}|&t        j                  d2       y |&|dk(  rdnd   }| j                  j                  |&d   d|      }+|+j                   d3k(  r|+}'|$|'k7  r=t        j                  d       y |+j                   d4k(  r|+}nt        j                  d2       | j                  j                  |j                  d         },t        |,t        j                        rIt        |,j                        d5k(  r1|,j                  d d d6k(  r|,j                  d   |,j                  d   k(  st        j                  d7       y t        j                   |,t        j"                  |,            rd8}nRt        j                   |,t        j$                  t        j"                  |,                  st        j                  d9       y | j                  j                  |%g d:g d;      }-|-t        j                  d<       y |-\  }.}/}0||0k7  rt        j                  d=       y | j                  j                  |%g d	g d
      }1|1B| j                  j                  |%g d>g d?      }1|1t        j                  d@       y |1\  }}2}3}4}5n|1\  }2}3}4}5||5k7  rt        j                  dA       y |r|2|k7  rt        j                  dB       y dC}6|#|d   j                  d   }7| j'                  |7      }6| j)                  |2||      xs | j+                  |2||      }|t        j-                  dD       y | j                  j/                  |      st        j                  dE       | j1                  ||      }|t        j-                  dF       y | j                  j3                  |      st        j-                  dG       y | j5                  ||||||j6                  d   |j6                  d   |6|	       d| _        y )HNr   )rh   ReshapeGemmr   r   r   re   )r   Nr   r   r   r   r   )r(   return_indice)r   r   r   r   r   re   )Nr   r   r   r   r   r   r   )rB   r   r   r2   )r   r   r   r   z&fuse_attention: failed to match v path)r   r   r   r   )r   r   r   r   )r   r   r   r   )rh   re   r   )r   Nr   )rh   re   r   z'fuse_attention: failed to match fc pathri   r   zCUpstream Add and (Skip)LayerNormalization shall have one same inputT)SoftmaxSubMulDivre   )r   r   r   r   r   )
r   r   Slicer   r@   r   r1   r   Shaper   )
r   r   r   r   r   r   r   r   r   r   z8fuse_attention: failed to match unidirectional mask path   z-fuse_attention: skip since div_qk != div_maskr   i)r   Wherer   re   )r   r   r   r   )r   rh   r   r   re   )r   r   Nr   r   z(fuse_attention: failed to match qk nodes)r   r   Castr@   r@   r   )Nr   r   r   r   r   )r   r   r@   r@   r   )Nr   r   r   r   )r   r   r@   r@   )Nr   r   r   z9fuse_attention: failed to match input attention mask path)r   r   r   r@   r   r1   r   r   )r   r   r   r   r   r   r   r   )r   r   r@   r   r1   r   r   )r   r   r   r   r   r   r   z)fuse_attention: failed to match mask pathr   rB      )r   r   z4fuse_attention: skip since mask shape is not 1x1xWxWFzDfuse_attention: skip since mask is neither lower triangular nor ones)r   r   r2   )r   r   r   z&fuse_attention: failed to match q pathz.fuse_attention: skip since split_fc != split_q)r   rB   r   r   r2   )r   r   r   r   r   z&fuse_attention: failed to match k pathz.fuse_attention: skip since split_fc != split_kz8fuse_attention: skip since concat_k != concat_k_to_match z)fuse_attention: failed to match past pathzpast is not graph input.z,fuse_attention: failed to match present pathz!expect present to be graph output)r    r
   r%   r$   r!   r"   get_constant_inputlenr   match_parent_pathsr   get_constant_value
isinstancenpndarrayshapeallclose	ones_liketrilrS   r/   r>   r;   rL   rI   find_graph_outputr~   rD   prune_graph)8r   normalize_noderE   r(   r*   rH   r   is_normalize_node_skiplayernorm	qkv_nodesanother_inputadd_qkvreshape_qkvrw   	reshape_1	reshape_2transpose_qkv
matmul_qkvv_nodesr'   transpose_v	reshape_vsplit_fcfc_nodesru   i_rv   layernorm_before_attentionry   
slice_maskinput_mask_nodesconcat_k_to_matchqk_nodes
softmax_qksub_qkmul_qkdiv_qk	matmul_qk
mask_nodesdiv_maskmul_valwhere_qkadd_qkdiv_or_concat	mask_dataq_nodestranspose_q	reshape_qsplit_qk_nodesr&   transpose_k	reshape_ksplit_krP   rO   s8                                                           r   fusezFusionGptAttention.fuse   sh   *8*@*@D\*\'	.

44W($7+ 5 I 

44P%$7+ 5 I .  $MM!mA.>*>?M  **..z;fhtu?LLAB7>4;	8 :://@	
 zz33H#	H zz337#	H ::77? '	 FG ))!,I::00!=DAqqk''*G ))!,Iqk''*G%-b\" $>X>^>^)^LL^_ 
 :://
<fhwx>F;Z55 /J  !WX!"~H#AJ!LM:"z!}'<'<'E!ZZ:::a=I
7f$.5XD* "ZZ:::LIACUV $NAx GH|Hb\F IAv!!)-)F)F X1
 P.
 E+ (!*&#Q$ $+LL!\]'(1,1A!1D1L1LPU1U!%!>!>?OPQ?R!SJAw&(18.#zz<< d0 \-
 $ Az1 !HI#aAQ7J JJ11*R.!EXYM$$-(X%LL!PQ&&(2$1!HI JJ11*2B2B12EF	y"**-IOO$)#v-"iooa&88LLOP;;y",,y"9: %YY0G(HILL_`**..y:[]fg?LLAB,3)iwLLIJ**..y:egst?jj22HG
 EFAH>Hk9g:A7X{IwwLLIJ->!>LLST$&!')"-33A6J(,(@(@(L% ((8=PQ 
UYUnUnh 3V
 <KKCDzz**40LL34 $$X/BC?KKFGzz++G4KK;<""&--a0q!%
	
  r   )
rT   rU   rV   rW   r   rX   r   r~   r   rY   rZ   s   @r   r\   r\      s(    +i +C +.K` r   r\   )loggingr   numpyr   fusion_baser   fusion_utilsr   onnxr   
onnx_modelr   rT   r!   r	   r\    r   r   <module>r      sA   
    $   	8	X) X)vw 3 w r   