
    gL                         d dl mZ d dlmZmZ d dlZd dlmZ d dl	m
Z
 d dlmZmZmZ d dlmZ  ee      Z G d d	e      Zy)
    )	getLogger)TupleUnionN)Fusion)NumpyHelper)	NodeProtoTensorProtohelper)	OnnxModelc                       e Zd ZdZdedededededef fdZd$d	ed
edefdZ	d Z
	 d$d	eded
edeeef   fdZdedededededededeedf   fdZdedededededededeedf   fdZd Zd Zd Zd Zd Zd efd!Zd" Zd# Z xZS )%FusionAttentionUnetzB
    Fuse Attention subgraph of UNet into one Attention node.
    modelhidden_size	num_headsis_cross_attentionenable_packed_qkvenable_packed_kvc                     t         |   ||r|rdnddg       || _        || _        || _        || _        || _        d| _        d| _        y )N	AttentionMultiHeadAttentionLayerNormalizationT)	super__init__r   r   r   r   r   num_heads_warninghidden_size_warning)selfr   r   r   r   r   r   	__class__s          c/var/www/openai/venv/lib/python3.12/site-packages/onnxruntime/transformers/fusion_attention_unet.pyr   zFusionAttentionUnet.__init__   sc     	-2CKI]!"	

 '""4 "3 0 "&#'     	reshape_q	is_torch2returnc                 d   d}|r| j                   j                  |d      }|r|j                  dk(  rt        |j                        dk(  r| j                   j                  |j                  d         }t        |t        j                        rt        |j                        dgk(  rut        |      }ni| j                   j                  |j                  d         }t        |t        j                        r't        |j                        dgk(  rt        |d         }t        |t              r|dkD  r|S y)zDetect num_heads from a reshape node.

        Args:
            reshape_q (NodeProto): reshape node for Q
            is_torch2 (bool): graph pattern is from PyTorch 2.*
        Returns:
            int: num_heads, or 0 if not found
        r      Concat      )r   
get_parentop_typeleninputget_constant_value
isinstancenpndarraylistshapeint)r   r    r!   r   reshape_parentq_shape_values         r   get_num_headsz!FusionAttentionUnet.get_num_heads3   s     	!ZZ229a@N."8"8H"D^MaMaIbfgIg JJ99.:N:Nq:QR	i4ioo9NSTRU9U #II !JJ99)//!:LMM-4m>Q>Q9RWXVY9Ya 01	i%)a-r   c                     | j                   j                  |j                  d         }|r"t        j                  |      j
                  d   S y)zDetect hidden_size from LayerNormalization node.
        Args:
            layernorm_node (NodeProto): LayerNormalization node before Q, K and V
        Returns:
            int: hidden_size, or 0 if not found
        r'   r   )r   get_initializerr+   r   to_arrayr1   )r   layernorm_nodelayernorm_biass      r   get_hidden_sizez#FusionAttentionUnet.get_hidden_sizeO   sE     33N4H4H4KL''7==a@@r   r9   c                    | j                  ||      }|dk  r| j                  }| j                  dkD  rH|| j                  k7  r9| j                  r-t        j	                  d| j                   d| d       d| _        | j                  |      }|dk  r| j                  }| j                  dkD  rH|| j                  k7  r9| j                  r-t        j	                  d| j                   d| d       d| _        ||fS )aF  Detect num_heads and hidden_size.

        Args:
            reshape_q (NodeProto): reshape node for Q
            is_torch2 (bool): graph pattern is from PyTorch 2.*
            layernorm_node (NodeProto): LayerNormalization node before Q, K, V
        Returns:
            Tuple[int, int]: num_heads and hidden_size
        r   z--num_heads is z. Detected value is z. Using detected value.Fz--hidden_size is )r5   r   r   loggerwarningr;   r   r   )r   r    r9   r!   r   r   s         r   get_num_heads_and_hidden_sizez1FusionAttentionUnet.get_num_heads_and_hidden_size\   s     &&y)<	>I>>A)t~~"=%%0@@TU^T__vwx).&**>:!**KaK43C3C$C'''(8(8'99Mk]Zqr ,1(+%%r   q_matmulk_matmulv_matmulr+   outputNc           
      x   | j                    }|rv|j                  d   |k7  s$|j                  d   |k7  s|j                  d   |k7  rt        j                  d|j                  d   |j                  d   |j                  d          y|j                  d   |k7  s1|j                  d   |j                  d   k7  s|j                  d   |k(  r@t        j                  d|j                  d   |j                  d   |j                  d          y|dkD  r$||z  dk7  rt        j                  d| d|        y| j                  j                  |j                  d         }	| j                  j                  |j                  d         }
| j                  j                  |j                  d         }|	r|
r|sy|	j                  }t        j                  |	      }t        j                  |
      }t        j                  |      }t        j                  d|j                   d	|j                   d
|j                   d|        |r|j                  |j                  k7  s|j                  |j                  k7  ry|j                  d   }|dkD  r||k7  rt        d| d| d      t        t        j                  |j                  dd             }| j                  r| j                  j                  d      }|}|}||z  }t        j                   |j#                  |||      |j#                  |||      |j#                  |||      g      j#                  ||dz  |z        }| j                  j                  dd      }| j%                  |dz   ||j                  d   |j                  d   g|       t'        j(                  d|j                  d   |dz   g|dz   g|      }| j*                  | j,                  |j.                  <   | j%                  |dz   t0        j2                  dgdd|d|gd       t'        j(                  d|dz   |dz   g|dz   g|dz         }| j*                  | j,                  |j.                  <   | j4                  j7                  ||g       | j8                  j7                  |||g       nt        j:                  |||fd      }d|z  }| j                  j                  d       }| j%                  |d!z   |||g|       nM| j                  j                  d      }| j<                  r%|j                  |j                  k7  ry|j                  d   }|j                  d   }||k(  sJ |j                  d   }|j                  d   }|j                  d   }||k(  r||k(  sJ |}|}||z  }t        j                   |j#                  |||      |j#                  |||      g      j#                  ||d"z  |z        }| j                  j                  dd#      }| j%                  |dz   ||j                  d   |j                  d   g|       t'        j(                  d|j                  d   |dz   g|dz   g|      }| j*                  | j,                  |j.                  <   | j%                  |dz   t0        j2                  dgdd|d"|gd       t'        j(                  d|dz   |dz   g|d$z   g|dz         }| j*                  | j,                  |j.                  <   | j4                  j7                  ||g       | j8                  j7                  ||g       t        j>                  d|gt        j@                  %      } d|z  }!| j%                  |d&z   ||!g|        |r| j                  s||d!z   |d&z   g}"nX|dz   g}"nQ| j<                  s1|jB                  d   |jB                  d   |jB                  d   |d&z   g}"n|jB                  d   |d$z   g}"t'        j(                  |r| j                  sd nd|"|g|      }#d'|#_"        |#jF                  j7                  t'        jH                  d(|      g       |r| j                  sd)n,d*jK                  | j                  rd+n| j<                  rd,nd-      }$| jM                  |$       |#S ).  Create an Attention node.

        Args:
            q_matmul (NodeProto): MatMul node in fully connection for Q
            k_matmul (NodeProto): MatMul node in fully connection for K
            v_matmul (NodeProto): MatMul node in fully connection for V
            num_heads (int): number of attention heads. If a model is pruned, it is the number of heads after pruning.
            hidden_size (int): hidden dimension. If a model is pruned, it is the hidden dimension after pruning.
            input (str): input name
            output (str): output name

        Returns:
            Union[NodeProto, None]: the node created or None if failed.
        r   RFor self attention, input hidden state for q and k/v shall be same. Got %s, %s, %sNXFor cross attention, input hidden state for q and k/v shall be different. Got %s, %s, %sinput hidden size # is not a multiple of num of heads r$   qw= kw= vw= hidden_size=Input hidden size (,) is not same as weight dimension of q,k,v (:). Please provide a correct input hidden size or pass in 0r      MatMul
MatMul_QKVname_prefix_weightname	data_typedimsvals_outinputsoutputsrX   _reshape_shape   FrX   rY   rZ   r[   rawReshape
_qkv_input_reshape)axisr   _qkv_weightr'   	MatMul_KV	_kv_inputdtype	_qkv_biascom.microsoftr   Attention (self attention)MultiHeadAttention ({})self attention with packed qkvcross attention with packed kvcross attention)'r   r+   r=   debugr   r7   rY   r   r8   r1   
ValueErrorr2   r.   prodr   create_node_namedstackreshapeadd_initializerr
   	make_nodethis_graph_namenode_name_to_graph_namerX   r	   INT64nodes_to_addextendnodes_to_removestackr   zerosfloat32rC   domain	attributemake_attributeformatincrease_counter)%r   r@   rA   rB   r   r   r+   rC   is_self_attentionq_weightk_weightv_weight
float_typeqwkwvw
qw_in_sizeqw_out_sizeattention_node_namecnh
qkv_weightmatmul_node_namematmul_nodereshape_nodeqkv_weight_dim
kw_in_size
vw_in_sizekw_out_sizevw_out_size	kv_weightqkv_biasqkv_bias_dimattention_inputsattention_nodecounter_names%                                        r   create_attention_nodez)FusionAttentionUnet.create_attention_node~   s   0 !% 7 77~~a E)X^^A->%-G8>>Z[K\`eKehNN1%NN1%NN1%	 ~~a E)hnnQ.?8>>RSCT.TZbZhZhijZkotZtnNN1%NN1%NN1%	 ?i 7A=LL-k]:]^g]hij::--hnnQ.?@::--hnnQ.?@::--hnnQ.?@X( ''
!!(+!!(+!!(+s288*D
$rxxjk][\ xx288#rxx288';!JQ;*#< )+6bcmbn oN N  bggbhhqrl34K%%&*jj&A&ABV&W#9,  YY

1a(;RZZ1a=PRTR\R\]^`acdRe'fgooq1uqy
 $(::#>#>xUa#>#b $$)I5($**1-z/?/?/BC#	 %  %..$NN1-/?)/KL-67)	 BFAUAU,,[-=-=>$$),<<)//Q1a %   &//(61(+;; 1<?@)J6  CGBVBV,,\->->?!!((+|)DE$$++Xx,JK  XXr2rl;
!"[&*jj&A&A+&N#$$,}<($n5#	 %  #'**"="=>R"S$$88rxx'XXa[
XXa[
!Z/// hhqk hhqk hhqk"k1k[6PPP9, IIrzz!Q':BJJq!Q<O&PQYYZ[]^ab]bef]fg	#'::#>#>xU`#>#a $$)I5(#//!,iooa.@A"	 %  %..$NN1-/?)/KL-67)	 BFAUAU,,[-=-=>$$),<<)//Q1a %   &//(61(+;; 1;>?)J6  CGBVBV,,\->->?!!((+|)DE$$++Xx,@A 88Q,BJJ?;${2 	 	 	
 ))'-7'+5$  %8,$F#G ((OOA&OOA&OOA&'+5	$  OOA&'+5$ 
  ))-d6L6LKSg#H$	
 !0  '')>)>{I)V(WX !)?)? )*11)) 19=9N9N5Te 	 	l+r   q_matmul_addk_matmul_addv_matmul_addc           
          | j                    }| j                  j                  |dd      }	| j                  j                  |dd      }
| j                  j                  |dd      }| j                  |      }|y|\  }}| j                  |      }|y|\  }}| j                  |      }|y|\  }}|r|	j                  d   |k7  s$|
j                  d   |k7  s|j                  d   |k7  r@t
        j                  d|	j                  d   |
j                  d   |j                  d          y|j                  d   |k7  s%|j                  d   |k7  s|j                  d   |k7  rFt
        j                  d|j                  d   |j                  d   |j                  d          y|	j                  d   |k7  s1|
j                  d   |j                  d   k7  s|
j                  d   |k(  r@t
        j                  d|	j                  d   |
j                  d   |j                  d          y|j                  d   |k7  s1|j                  d   |j                  d   k7  s|
j                  d   |k(  r@t
        j                  d|j                  d   |j                  d   |j                  d          y|dkD  r$||z  dk7  rt
        j                  d| d	|        y| j                  j                  |	j                  d
         }| j                  j                  |
j                  d
         }| j                  j                  |j                  d
         }|r|r|sy|j                  dk(  rt
        j                  d       yt        j                  |      }t        j                  |      }t        j                  |      }t
        j                  d|j                   d|j                   d|j                   d|        |r|j                  |j                  k7  s|j                  |j                  k7  ry|j                  d   }|dkD  r||k7  rt        d| d| d      t        t        j                  |j                  d
d             }| j                   rR| j                  j#                  d      }|}|}||z  } t        j$                  |j'                  |||       |j'                  |||       |j'                  |||       g      j'                  ||dz  | z        }!| j                  j#                  dd      }"| j)                  |"dz   t*        j,                  |!j                  d   |!j                  d
   g|!       t/        j0                  d|
j                  d   |"dz   g|"dz   g|"      }#| j2                  | j4                  |#j6                  <   |j6                  dz   }$| j)                  |$t*        j8                  dgdd|| gd       | j                  j#                  d d!      }%t/        j0                  d |j:                  d   |$g|%dz   g|%      }&| j2                  | j4                  |&j6                  <   | j                  j#                  d d"      }'t/        j0                  d |j:                  d   |$g|'dz   g|'      }(| j2                  | j4                  |(j6                  <   | j                  j#                  d d#      })t/        j0                  d |j:                  d   |$g|)dz   g|)      }*| j2                  | j4                  |*j6                  <   | j                  j#                  d$d%      }+t/        j0                  d$|&j:                  d   |(j:                  d   |*j:                  d   g|+dz   g|+      },|,j<                  j?                  t/        j@                  d&d      g       | j2                  | j4                  |,j6                  <   |,j6                  dz   }-| j)                  |-t*        j8                  dgdd|dz  | z  gd       | j                  j#                  d d'      }.t/        j0                  d |,j:                  d   |-g|.dz   g|.      }/| j2                  | j4                  |/j6                  <   | j                  j#                  d(d)      }0t/        j0                  d(|/j:                  d   |#j:                  d   g|0dz   g|0      }1| j2                  | j4                  |1j6                  <   |0dz   }2| j)                  |2t*        j8                  d*gdd|d| gd       t/        j0                  d |1j:                  d   |2g|d+z   g|0d,z         }3| j2                  | j4                  |3j6                  <   | jB                  j?                  |#|&|(|*|,|/|1|3g       | jD                  j?                  |	|
||||g       nIy| j                  j#                  d      }| jF                  r|j                  |j                  k7  ry|j                  d   }4|j                  d   }5|4|5k(  sJ |j                  d
   }|j                  d
   }6|j                  d
   }7||7k(  r|6|7k(  sJ |4}|}|6|z  } t        j$                  |j'                  |||       |j'                  |||       g      j'                  ||d-z  | z        }8| j                  j#                  dd.      }"| j)                  |"dz   t*        j,                  |8j                  d   |8j                  d
   g|8       t/        j0                  d|
j                  d   |"dz   g|"dz   g|"      }#| j2                  | j4                  |#j6                  <   |j6                  dz   }9| j)                  |9t*        j8                  dgdd|| gd       | j                  j#                  d d"      }'t/        j0                  d |j:                  d   |9g|'dz   g|'      }(| j2                  | j4                  |(j6                  <   | j                  j#                  d d#      })t/        j0                  d |j:                  d   |9g|)dz   g|)      }*| j2                  | j4                  |*j6                  <   | j                  j#                  d$d/      }:t/        j0                  d$|(j:                  d   |*j:                  d   g|:dz   g|:      };|;j<                  j?                  t/        j@                  d&d      g       | j2                  | j4                  |;j6                  <   |;j6                  dz   }<| j)                  |<t*        j8                  dgdd|d-z  | z  gd       | j                  j#                  d d0      }=t/        j0                  d |;j:                  d   |<g|=dz   g|=      }>| j2                  | j4                  |>j6                  <   | j                  j#                  d(d1      }?t/        j0                  d(|>j:                  d   |#j:                  d   g|?dz   g|?      }@| j2                  | j4                  |@j6                  <   |?dz   }2| j)                  |2t*        j8                  d*gdd|d-| gd       t/        j0                  d |@j:                  d   |2g|d2z   g|?d,z         }3| j2                  | j4                  |3j6                  <   | jB                  j?                  |#|(|*|;|>|@|3g       | jD                  j?                  |
|||g       nyt        jH                  d|gt        jJ                  3      }Ad|z  }B| j)                  |d4z   t*        j,                  |Bg|A       |r| j                   sy|d+z   g}Cn!| jF                  sy|j:                  d   |d2z   g}Ct/        j0                  |r| j                   sd5ndC|g|      }Dd6|D_&        |Dj<                  j?                  t/        j@                  d7|      g       |r| j                   sd8n,d9jO                  | j                   rd:n| jF                  rd;nd<      }E| jQ                  |E       DS )=rE   rR   r   NrF   z_For self attention, input hidden state for LoRA q and k/v weights shall be same. Got %s, %s, %srG   zeFor cross attention, input hidden state for LoRA q and k/v weights shall be different. Got %s, %s, %srH   rI   r$   
   zBweights are in fp16. Please run fp16 conversion after optimizationrJ   rK   rL   rM   rN   rO   rP   r   rQ   rS   rT   rV   rW   r\   r]   r`   r&   Frb   rd   Reshape_LoRA_QReshape_LoRA_KReshape_LoRA_Vr%   Concat_LoRA_QKVrg   Reshape_LoRA_QKVAddAdd_Weights_QKVra   re   rf   r'   ri   Concat_LoRA_KVReshape_LoRA_KVAdd_Weights_KVrj   rk   rm   r   rn   r   ro   rp   rq   rr   rs   ))r   r   match_parentmatch_lora_pathr+   r=   rt   r7   rY   r   r8   r1   ru   r2   r.   rv   r   rw   rx   ry   rz   r	   FLOATr
   r{   r|   r}   rX   r~   rC   r   r   r   r   r   r   r   r   r   r   r   )Fr   r   r   r   r   r   r+   rC   r   r@   rA   rB   q_lora_nodesq_lora_last_nodeq_lora_matmul_1k_lora_nodesk_lora_last_nodek_lora_matmul_1v_lora_nodesv_lora_last_nodev_lora_matmul_1r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   lora_weight_shape_tensor_nameq_lora_reshape_node_nameq_lora_reshape_nodek_lora_reshape_node_namek_lora_reshape_nodev_lora_reshape_node_namev_lora_reshape_nodeqkv_lora_concat_node_nameqkv_lora_concat_node'reshaped_lora_weights_shape_tensor_nameqkv_lora_reshaped_node_nameqkv_lora_reshaped_nodeadd_weights_node_nameadd_weights_nodeshape_tensor_namer   r   r   r   r   r    kv_lora_weight_shape_tensor_namekv_lora_concat_node_namekv_lora_concat_node*reshaped_kv_lora_weights_shape_tensor_namekv_lora_reshaped_node_namekv_lora_reshaped_nodeadd_kv_weights_node_nameadd_kv_weights_noder   r   r   r   r   sF                                                                         r   create_attention_node_loraz.FusionAttentionUnet.create_attention_node_lora~  s#   0 !% 7 77::**<1E::**<1E::**<1E++L9.:+	?++L9.:+	?++L9.:+	?~~a E)X^^A->%-G8>>Z[K\`eKehNN1%NN1%NN1%	   %%a(E1"((+u4"((+u4u#))!,#))!,#))!,	 ~~a E)hnnQ.?8>>RSCT.TZbZhZhijZkotZtnNN1%NN1%NN1%	   %%a(E1#))!,0E0Ea0HHNN1%.) $))!,#))!,#))!, ?i 7A=LL-k]:]^g]hij::--hnnQ.?@::--hnnQ.?@::--hnnQ.?@X( #LL]^!!(+!!(+!!(+s288*D
$rxxjk][\ xx288#rxx288';!JQ;*#< )+6bcmbn oN N  bggbhhqrl34K%%&*jj&A&ABV&W#9,  YY

1a(;RZZ1a=PRTR\R\]^`acdRe'fgooq1uqy
 $(::#>#>xUa#>#b $$)I5)//$**1-z/?/?/BC#	 %  %..$NN1-/?)/KL-67)	 BFAUAU,,[-=-=> 1A0E0EHX0X-$$6)//Q1 %  ,0::+F+Fy^n+F+o(&,&6&6,33A68UV5>?1	'# JNI]I],,-@-E-EF ,0::+F+Fy^n+F+o(&,&6&6,33A68UV5>?1	'# JNI]I],,-@-E-EF ,0::+F+Fy^n+F+o(&,&6&6,33A68UV5>?1	'# JNI]I],,-@-E-EF -1JJ,G,G^o,G,p)'-'7'7+2215+2215+2215
 7?@2	($ %..55v7L7LVUV7W6XYJNJ^J^,,-A-F-FG ;O:S:SVf:f7$$@)//QA	* %  /3jj.I.I)as.I.t+)/)9)9077:<cd86AB4	*& MQL`L`,,-C-H-HI )-

(C(CEWh(C(i%#)#3#3299!<k>P>PQR>ST2V;<.	$  GKFZFZ,,-=-B-BC %:<L$L!$$*)//Q1a %   &//,33A68IJ0<?@.;	  CGBVBV,,\->->?!!((#+++,.($	 $$++Xx<Yegs,tu "&**"="=>R"S$$88rxx'XXa[
XXa[
!Z/// hhqk hhqk hhqk"k1k[6PPP9, IIrzz!Q':BJJq!Q<O&PQYYZ[]^ab]bef]fg	#'::#>#>xU`#>#a $$)I5)//#//!,iooa.@A"	 %  %..$NN1-/?)/KL-67)	 BFAUAU,,[-=-=> 4D3H3HK[3[0$$9)//Q1 %  ,0::+F+Fy^n+F+o(&,&6&6,33A68XY5>?1	'# JNI]I],,-@-E-EF ,0::+F+Fy^n+F+o(&,&6&6,33A68XY5>?1	'# JNI]I],,-@-E-EF ,0::+F+Fx]m+F+n(&,&6&6/66q9;N;U;UVW;XY5>?1	'# $--44f6K6KFTU6V5WXIMI]I],,-@-E-EF >Q=U=UXh=h:$$C)//QA	* %  .2ZZ-H-H`q-H-r*(.(8(8/66q9;ef7&@A3	)% LPK_K_,,-B-G-GH ,0::+F+FuZj+F+k(&,&6&6188;[=O=OPQ=RS5>?1	'# JNI]I],,-@-E-EF %=?O$O!$$*)//Q1a %   &///66q9;LM0;>?1J>	  CGBVBV,,\->->?!!((#+++-+$
 $$++Xx|,\]  88Q,BJJ?;${2!''	 	 	
 ))$7,$F#G (( !''*'+5$ 
  ))-d6L6LKSg#H$	
 !0  '')>)>{I)V(WX !)?)? )*11)) 19=9N9N5Te 	 	l+r   c           
         | j                  |||      ry | j                  j                  |dd      }|)| j                  s| j                  j                  |dd      }|y |j                  d   }||   }d }|D ]  }|j
                  dk(  s|} n |y | j                  ||      xs | j                  ||      }	|	r|	\  }
}}}}}}|}| j                  |||
      \  }}|dk  rt        j                  d       y | j                  ||||||j                  d   |j                  d         }|y | j                  ||      xs | j                  ||      }	|	y |	\  }
}}}}}}|}| j                  |||
      \  }}|dk  rt        j                  d       y | j                  ||||||j                  d   |j                  d         }|y | j                  |||
      \  }}|dk  rt        j                  d       y | j                  j!                  |       | j"                  | j$                  |j&                  <   | j(                  j+                  ||g       d| _        y )Nr   r   rd   *fuse_attention: failed to detect num_headsr+   rC   T)fuse_a1111_fp16r   r   r   rC   r)   match_qkv_torch1match_qkv_torch2r?   r=   rt   r   match_qkv_torch1_loramatch_qkv_torch2_lorar   r   appendr|   r}   rX   r   r   prune_graph)r   normalize_nodeinput_name_to_nodesoutput_name_to_nodenode_before_layernorm
root_inputchildren_nodesskip_addnode	match_qkvr!   reshape_qkvtranspose_qkvr    matmul_qmatmul_kmatmul_vattention_last_nodeq_num_headsq_hidden_sizenew_nodematmul_add_qmatmul_add_kmatmul_add_vs                           r   fusezFusionAttentionUnet.fuseR  s   0CEXY $

 7 7q Q !(1H1H$(JJ$;$;NIWX$Y! (*11!4
,Z8"D||u$ # ))*h?n4CXCXYcemCn	 ]fZI{M9hRZ"-)-)K)KIWegp)q&KaIJ 11$++A.*11!4 2 H  22:xH DLfLfHMI  irfI{M9lLZf"-)-)K)KIWegp)q&KaIJ 66$++A.*11!4 7 H )-)K)KIWegp)q&KaIJ  *6:6J6J$$X]]3##%8-$HI  r   c           
         |j                   d   |k(  rdnd}| j                  j                  |g d|dddddg      }|y|\  }}}}}}| j                  j                  |g dg d      }	|	t        j	                  d       y|	\  }}}}
| j                  j                  |g dg d	      }||\  }}}nA| j                  j                  |g d
g d      }||\  }}}}nt        j	                  d       y| j                  j                  |g dg d      }|t        j	                  d       y|\  }}}}| j                  j                  |g dg d      }|t        j	                  d       y|\  }}}}}d||||||
fS )z.Match Q, K and V paths exported by PyTorch 1.*r   r$   )r   rR   rd   	Transposerd   rR   Nrd   r   rd   rR   r$   r   r   r   &fuse_attention: failed to match v pathSoftmaxMulrR   r   r   r   r  r   r  rR   r   r   r   r   'fuse_attention: failed to match qk path&fuse_attention: failed to match q path)r   rd   r   rd   rR   r$   r   r   r   r   &fuse_attention: failed to match k pathFr+   r   match_parent_pathr=   rt   )r   r   r   another_input	qkv_nodes_r   r   
matmul_qkvv_nodesr   qk_nodes_softmax_qk_mul_qk	matmul_qk	_add_zeroq_nodes_transpose_qr    r   k_nodesr   s                         r   r   z$FusionAttentionUnet.match_qkv_torch1  s   %^^A.*<!JJ00JD$1a0
	 <E9A{M1j **..z;hjvw?LLAB%Aq(:://
<XZcd08-['9zz33J@ceqrH#?G<i)FG**..y:giuv?LLAB18.L)X**..QSb
 ?LLAB!(Aq!Xk=)XxQYYYr   c           	         |j                   d   |k(  rdnd}| j                  j                  |g d|ddddg      }|y|\  }}}}}| j                  j                  |g dg d      }	|	t        j	                  d       y|	\  }}}
| j                  j                  |dd	gddg      }||\  }}nt        j	                  d
       y| j                  j                  |g dg d      }|t        j	                  d       y|\  }}}}| j                  j                  |g dg d      }|t        j	                  d       y|\  }}}}| j                  j                  |g dg d      }||d   |k7  rt        j	                  d       yd||||||
fS )z.Match Q, K and V paths exported by PyTorch 2.*r   r$   )r   rR   rd   r   rR   N)r   rd   rR   r$   r   r   r   r  rR   r  )r  r   rd   rR   r   Nr   r   r  r$   Nr   r   r	  SqrtDivr  CastSliceShaper   rd   Nr   r$   r   r   r   r   r   z*fuse_attention: failed to match mul_q pathTr
  )r   r   r   r  r  r  r   r   r  r  r   r  r  r  r  mul_qr  r    r   r  _mul_kr   mul_q_nodess                          r   r   z$FusionAttentionUnet.match_qkv_torch2  s   %^^A.*<!JJ00?D$1-
	 9B6A{M:**..z;]_hi?LLAB"Ax:://
Y<QTUWXSYZ'/$[)LLBC**..y:cetu?LLAB5<2i**..y:cetu?LLAB#* Ax jj22U'

 +b/Y">LLEF[-HhPXXXr   c                    |j                   d   |k(  rdnd}| j                  j                  |g d|ddddddg      }|y|\  }}}}}}}| j                  j                  |g dg d      }	|	t        j	                  d       y|	\  }}}}
| j                  j                  |g dg d	      }||\  }}}nA| j                  j                  |g d
g d      }||\  }}}}nt        j	                  d       y| j                  j                  |g dg d      }|t        j	                  d       y|\  }}}}| j                  j                  |g dg d      }|t        j	                  d       y|\  }}}}}d||||||
fS )zJMatch Q, K and V paths exported by PyTorch 1 that contains LoRA patterns.*r   r$   )r   r   rR   rd   r   rd   rR   N)rd   r   rd   r   r   +fuse_attention: failed to match LoRA v pathr   r  r  r  ,fuse_attention: failed to match LoRA qk path+fuse_attention: failed to match LoRA q path)r   rd   r   rd   r   r  +fuse_attention: failed to match LoRA k pathFr
  )r   r   r   r  r  r  r   r   r  r  r   r  r  r  r  r  r  r  r    r   r  r   s                         r   r   z)FusionAttentionUnet.match_qkv_torch1_lora  s   %^^A.*<!JJ00QAtT1a3
	
 ?H<Aq+}a **..z;egst?LLFG")Aq,:://
<XZcd08-['9zz33J@ceqrH#?G<i)KL**..y:dfrs?LLFG5<2L)\**..NP_
 ?LLFG%,"Aq!\k=)\<Yeeer   c           
          |j                   d   |k(  rdnd}| j                  j                  |g d|dddddg      }|y|\  }}}}}}| j                  j                  |g dg d      }	|	t        j	                  d       y|	\  }}}
| j                  j                  |dd	gddg      }||\  }}nt        j	                  d
       y| j                  j                  |g dg d      }|t        j	                  d       y|\  }}}}| j                  j                  |g dg d      }|t        j	                  d       y|\  }}}}| j                  j                  |g dg d      }||d   |k7  rt        j	                  d       yd||||||
fS )zJMatch Q, K and V paths exported by PyTorch 2 that contains LoRA patterns.*r   r$   )r   r   rR   rd   r   rR   N)r   rd   r   r  r)  r  rR   r*  )r  r   rd   r   r  r+  r  r,  r  r#  r$  z/fuse_attention: failed to match LoRA mul_q pathTr
  )r   r   r   r  r  r  r   r   r  r  r   r  r  r  r  r%  r  r    r   r  r&  r   r'  s                          r   r   z)FusionAttentionUnet.match_qkv_torch2_loraA  s   %^^A.*<!JJ00FAtT1a0
	
 <E9Aq+}j**..z;Z\ef?LLFG&A|:://
Y<QTUWXSYZ'/$[)LLGH**..y:`bqr?LLFG9@6i**..y:`bqr?LLFG'.$A| jj22U'

 +b/Y">LLJK[-L,Xdddr   add_nodec                    | j                   j                  |ddgddg      }|	|\  }}||fS | j                   j                  |g dg d      }|
|\  }}}||fS | j                   j                  |g dg d      }||\  }}}}||fS y )NrR   r$   r   )r  rR   rR   r  )r  r  rR   rR   r   )r   r  )r   r.  
lora_nodeslora_matmul_2_nodelora_matmul_1_nodelora_mul_noder  s          r   r   z#FusionAttentionUnet.match_lora_patht  s     ZZ11x F

 !7A4!3&(:;; ZZ11'

 !5?2]A1!#566 ZZ11.

 !8B5]Aq"4!#566r   c           
      D   | j                   j                  |ddgddg      }|$| j                   j                  |ddgddg      }|y|\  }}|j                  d   }||   }d}	|D ]  }
|
j                  dk(  s|
}	 n |	y| j	                  ||	      }|y|\  }}}}}}| j                   j                  |dd      }| j                   j                  |dd      }| j                   j                  |dd      }||| j                  s||k(  rn||k7  r||k(  sy|j                  d   |j                  d   k7  ry|}| j                  |d      xs | j                  |d      }|dk  rt        j                  d       y| j                  |      }| j                  ||||||j                  d   |j                  d   	      }|y| j                  j                  |       | j                  | j                   |j"                  <   | j$                  j'                  ||g       d| _        y)
zPFuse attention of fp16 UNet exported in A1111 (stable diffusion webui) extensionr   r   r   Nrd   FTr   r   )r   r  rC   r)   match_qkv_a1111r   r   r+   r5   r=   rt   r;   r   r   r   r|   r}   rX   r   r   r   )r   r   r   r   
entry_path_castr   r   r   r   r   r   r   r   r    r   r   r   cast_qcast_kcast_vr   r   r   r   s                            r   r   z#FusionAttentionUnet.fuse_a1111_fp16  sm   ZZ11.65/TUWXSYZ
55nvyFY\]_`[abJ!'1$$*11!4
,Z8"D||u$ # ((X>	 	
 ((61=((61=((61=")-)@)@6!fPVFV& <<?n33A66)((D9aT=O=OPY[`=a!LLEF,,^< --..#&--a0 . 
   *6:6J6J$$X]]3##%8-$HI  r   c           
         |j                   d   |k(  rdnd}| j                  j                  |g d|dddddg      }|y|\  }}}}}}	| j                  j                  |	g dg d      }
|
t        j	                  d       y|
\  }}}}| j                  j                  |	g dg d	      }|	|\  }}}}}nt        j	                  d
       y| j                  j                  |g dg d      }|t        j	                  d       y|\  }}}}| j                  j                  |g dg d      }|t        j	                  d       y|\  }}}}||||||fS )zKMatch Q, K and V paths exported by A1111 (stable diffusion webui) extensionr   r$   )r   rR   rd   r   rd   EinsumNr   r   r   )r   r   r  r  r<  )r   r   r   r   Nr  r  r  r	  r
  )r   r   r   r  r  r  r   r   reshape_einsum
einsum_qkvr  r   r  r  	einsum_qkr  r  r    r   r  r   s                        r   r5  z#FusionAttentionUnet.match_qkv_a1111  so   %^^A.*<!JJ00JD$1a0
	 IRFA{M>:**..z;hjvw?LLAB%Aq(:://DFX
 08-Q;9LLBC**..y:giuv?LLAB18.L)X**..y:giuv?LLAB%Aq(M9h(RRr   )F)__name__
__module____qualname____doc__r   r2   boolr   r   r5   r;   r   r?   strr   r   r   r   r   r   r   r   r   r   r5  __classcell__)r   s   @r   r   r      s   (( ( 	(
 !(  ( (:y T c 8 RW &" &4= &JN &	sCx &D~~ ~ 	~
 ~ ~ ~ ~ 
y$	~@RR  R  	R
 R R R R 
y$	RhX t/Zb2Yh.f`1ef**XL\*Sr   r   )loggingr   typingr   r   numpyr.   fusion_baser   fusion_utilsr   onnxr   r	   r
   
onnx_modelr   r@  r=   r    r   r   <module>rO     s9   
     $ / /  	8	GS& GSr   