
    gU/                     ~    d dl mZ d dlmZmZ d dlZd dlmZ d dl	m
Z
mZmZmZ d dlmZ  ee      Z G d de      Zy)	    )	getLogger)TupleUnionN)Fusion)	NodeProtoTensorProtohelpernumpy_helper)	OnnxModelc                        e Zd ZdZdededef fdZdededeeef   fd	Z	d
edededededededede
de
deedf   fdZd Z xZS )FusionAttentionVaezI
    Fuse Attention subgraph of Vae Decoder into one Attention node.
    modelhidden_size	num_headsc                 b    t         |   |ddg       || _        || _        d| _        d| _        y )N	AttentionSoftmaxT)super__init__r   r   num_heads_warninghidden_size_warning)selfr   r   r   	__class__s       b/var/www/openai/venv/lib/python3.12/site-packages/onnxruntime/transformers/fusion_attention_vae.pyr   zFusionAttentionVae.__init__   s7    i[9&" "&#'     	reshape_qadd_qreturnc                    | j                   j                  |d      }|t        |j                        dk7  r| j                  | j
                  fS | j                   j                  |j                  d         }|)t        |t        j                        r|j                  dk(  s| j                  | j
                  fS t        |      }|dk  r| j                  | j
                  fS | j                   j                  |      \  }}|)t        |t        j                        r|j                  dk7  r| j                  | j
                  fS |j                  d   }| j                  dkD  rC|| j                  k7  r4| j                  r(t         j#                  d|| j                         d| _        | j
                  dkD  rC|| j
                  k7  r4| j$                  r(t         j#                  d|| j
                         d| _        ||fS )zDetect num_heads and hidden_size from a reshape node.

        Args:
            reshape_q (NodeProto): reshape node for Q
            add_q (NodeProto): add node for Q

        Returns:
            Tuple[int, int]: num_heads and hidden_size
                 r   z?Detected number of attention heads is %d. Ignore --num_heads %dFz3Detected hidden size is %d. Ignore --hidden_size %d)r   
get_parentleninputr   r   get_constant_value
isinstancenpndarraysizeintget_constant_inputndimshaper   loggerwarningr   )	r   r   r   concatvaluer   _biasr   s	            r   get_num_heads_and_hidden_sizez0FusionAttentionVae.get_num_heads_and_hidden_size   s    &&y!4>S.!3>>4#3#333

--fll1o>!j

&C

VW>>4#3#333J	>>>4#3#333**//64L*T2::">499PQ>>>4#3#333jjm>>A)t~~"=%%UW`bfbpbp */&aK43C3C$C''TVacgcscst+0(+%%r   q_matmulq_addk_matmulk_addv_matmulv_add
input_nameoutput_nameNc                 2
   |j                   d   |	k7  s$|j                   d   |	k7  s|j                   d   |	k7  r@t        j                  d|j                   d   |j                   d   |j                   d          y|dkD  r ||z  dk7  rt        j                  d||       y| j                  j	                  |j                   d         }| j                  j	                  |j                   d         }| j                  j	                  |j                   d         }|r|r|sy| j                  j	                  |j                   d         xs( | j                  j	                  |j                   d         }| j                  j	                  |j                   d         xs( | j                  j	                  |j                   d         }| j                  j	                  |j                   d         xs( | j                  j	                  |j                   d         }t        j                  |      }t        j                  |      }t        j                  |      }t        j                  |j                        }t        j                  |j                        }t        j                  |j                        }|j                  dk(  rt        j                  d       yt        j                  |      }t        j                  |      }t        j                  |      }|j                  |j                  k7  s|j                  |j                  k7  ry|j                  d   }|j                  d   }|j                  d   }||k(  r||k(  sJ |dkD  r||k7  rt        d| d	| d
      t        j                  |j                  dd       }t        j                  |||fd      }dt        |      z  }| j                  j                  d      } ||cxk(  r|k(  sJ  J d}!t        j                  |||fd      }"d|z  }!| j                  | dz   t         j"                  ||g|       t        j$                  d|gt        j&                        }"d|z  }!| j                  | dz   t         j"                  |!g|"       |	| dz   | dz   g}#t)        j*                  d|#|
g|       }$d|$_        |$j.                  j1                  t)        j2                  d|      g       | j5                  d       |$S )at  Create an Attention node.

        Args:
            q_matmul (NodeProto): MatMul node in fully connection for Q
            q_add (NodeProto): Add bias node in fully connection for Q
            k_matmul (NodeProto): MatMul node in fully connection for K
            k_add (NodeProto): Add bias node in fully connection for K
            v_matmul (NodeProto): MatMul node in fully connection for V
            v_add (NodeProto): Add bias node in fully connection for V
            num_heads (int): number of attention heads. If a model is pruned, it is the number of heads after pruning.
            hidden_size (int): hidden dimension. If a model is pruned, it is the hidden dimension after pruning.
            input_name (str): input name
            output_name (str): output name

        Returns:
            Union[NodeProto, None]: the node created or None if failed.
        r   zRFor self attention, input hidden state for q and k/v shall be same. Got %s, %s, %sNz9input hidden size %d is not a multiple of num of heads %dr    
   zBweights are in fp16. Please run fp16 conversion after optimizationzInput hidden size (z,) is not same as weight dimension of q,k,v (z:). Please provide a correct input hidden size or pass in 0)axis   r   _qkv_weight)name	data_typedimsvals)dtype	_qkv_bias)inputsoutputsrC   zcom.microsoftr   zAttention (self attention))r%   r/   debugr   get_initializerr
   to_arrayr(   prodr.   rD   
ValueErrorstackr+   create_node_nameadd_initializerr   FLOATzerosfloat32r	   	make_nodedomain	attributeextendmake_attributeincrease_counter)%r   r6   r7   r8   r9   r:   r;   r   r   r<   r=   q_weight_tensork_weight_tensorv_weight_tensorq_bias_tensork_bias_tensorv_bias_tensorq_biask_biasv_biasq_bias_shapek_bias_shapev_bias_shapeq_weightk_weightv_weight
qw_in_size
kw_in_size
vw_in_sizeqw_out_size
qkv_weightqkv_weight_dimattention_node_nameqkv_bias_dimqkv_biasattention_inputsattention_nodes%                                        r   create_attention_nodez(FusionAttentionVae.create_attention_nodeG   s   < >>!
*hnnQ.?:.MQYQ_Q_`aQbfpQpLLdq!q!q!	 ?i 7A=LLTVaclm**44X^^A5FG**44X^^A5FG**44X^^A5FGO

225;;q>BpdjjF`F`afalalmnaoFp

225;;q>BpdjjF`F`afalalmnaoFp

225;;q>BpdjjF`F`afalalmnaoFp&&}5&&}5&&}5wwv||,wwv||,wwv||, $$*LL]^((9((9((9 >>X^^+x~~/O^^A&
^^A&
^^A&
Z'J*,DDD?{j8%k]2^_i^j kJ J  gghnnQR01XXx8<1E
S--"jj99+F|;|;;;;;88VVV41=<'$}4!''n-	 	 	
 88Q,BJJ?;${2!''	 	 	
 -/+-
  ))# M$	
 !0  '')>)>{I)V(WX:;r   c                    | j                   j                  |d|d      }|y | j                   j                  |d|d      }|y | j                   j                  |d|d      }|y | j                   j                  |d|d      }|y | j                   j                  |d|d      }|y | j                   j                  |d|d      }	|	y | j                   j                  |	d|d      }
|
y | j                   j                  |g dg d      }|t        j	                  d	       y |\  }}}}}| j                   j                  |g d
g d      }||\  }}}}nt        j	                  d       y | j                   j                  |g dg d      }|t        j	                  d       y |\  }}}}}| j                   j                  |g dg d      }|t        j	                  d       y |\  }}}}}}|}| j                  ||      \  }}|dk  rt        j	                  d       y | j                  |||||||||j                  d   |j                  d   
      }|y | j                  j                  |       | j                  | j                  |j                  <   | j                  j                  ||g       d| _        y )NMatMulF)	recursiveReshape	TransposeAdd)rz   r{   rz   r|   rx   )r    r   r   r   Nz&fuse_attention: failed to match v path)r   r|   Mulrx   )r   r   r   r   z'fuse_attention: failed to match qk path)r   r   r   r   Nz&fuse_attention: failed to match q path)r{   rz   r{   rz   r|   rx   )r    r   r   r   r   Nz&fuse_attention: failed to match k pathr   z*fuse_attention: failed to detect num_headsT)r   find_first_child_by_typematch_parent_pathr/   rK   r5   rv   r%   outputnodes_to_addappendthis_graph_namenode_name_to_graph_namerC   nodes_to_removerY   prune_graph) r   softmax_nodeinput_name_to_nodesoutput_name_to_node
matmul_qkvreshape_qkvtranspose_qkvreshape_out
matmul_outadd_outtranspose_outv_nodesr3   add_vmatmul_vqk_nodes_softmax_qk	_add_zero_mul_qk	matmul_qkq_nodes_transpose_qr   r   matmul_qk_nodesadd_kmatmul_kattention_last_nodeq_num_headsq_hidden_sizenew_nodes                                    r   fusezFusionAttentionVae.fuse   s   ZZ88xQdpu8v
jj99*iQdpu9v

;;&9U < 
  jj999&9U : 
 ZZ88hPcot8u
**55j%I\hm5n?

;;G[Reqv;w **..LN`
 ?LLAB%,"Aq%:://
<_amn;C8[)WiLLBC**..KM_
 ?LLAB8?5L)UH**..XZo
 ?LLAB(/%Aq!UH)%)%G%G	SX%Y"]!LLEF --NN1&&q)
   *6:6J6J$$X]]3##%8-$HI  r   )__name__
__module____qualname____doc__r   r+   r   r   r   r5   strr   rv   r   __classcell__)r   s   @r   r   r      s    (i (c (c ('&y '& '&W\]`be]eWf '&RHH H 	H
 H H H H H H H 
y$	HT\ r   r   )loggingr   typingr   r   numpyr(   fusion_baser   onnxr   r   r	   r
   
onnx_modelr   r   r/   r    r   r   <module>r      s4   
     = =  	8	]  ] r   