
    gLQ                         d dl mZ d dlmZmZ d dlZd dlmZ d dl	m
Z
 d dlmZmZmZ d dlmZ  ee      Z G d d	e      Zy)
    )	getLogger)TupleUnionN)Fusion)NumpyHelper)	NodeProtohelpernumpy_helper)	OnnxModelc                        e Zd ZdZdededef fdZdedefdZd	edefd
Z	d Z
	 ddedededeeef   fdZdedededededededededeedf   fdZd Zd ZdefdZd dZdededededef
dZ xZS )!FusionMultiHeadAttentionSam2zI
    Fuse MultiHeadAttention subgraph of Segment Anything v2 (SAM2).
    modelhidden_size	num_headsc                 b    t         |   |ddg       || _        || _        d| _        d| _        y )NMultiHeadAttentionLayerNormalizationT)super__init__r   r   num_heads_warninghidden_size_warning)selfr   r   r   	__class__s       c/var/www/openai/venv/lib/python3.12/site-packages/onnxruntime/transformers/fusion_attention_sam2.pyr   z%FusionMultiHeadAttentionSam2.__init__   s<     	 47K6LM&" "&#'     	reshape_qreturnc                    d}| j                   j                  |j                  d         }|At        |t        j
                        r't        |j                        dgk(  rt        |d         }t        |t              r|dkD  r|S y)Detect num_heads from a reshape node.

        Args:
            reshape_q (NodeProto): reshape node for Q
        Returns:
            int: num_heads, or 0 if not found
        r            )	r   get_constant_valueinput
isinstancenpndarraylistshapeint)r   r   r   shape_values       r   get_decoder_num_headsz2FusionMultiHeadAttentionSam2.get_decoder_num_heads$   sy     	 jj33IOOA4FG"+rzz2tK<M<M7NSTRU7UA/	i%)a-r   
reshape_inc                 R   d}| j                   j                  |j                  d         }|Bt        |t        j
                        rt        |j                        dgk(  rt        |d         }n| j                   j                  |dd      }|t        |j                        dk(  rk| j                   j                  |j                  d         }|At        |t        j
                        r't        |j                        dgk(  rt        |d         }t        |t              r|dkD  r|S y)r   r   r          Concat)r   r#   r$   r%   r&   r'   r(   r)   r*   match_parentlen)r   r-   r   r+   concat_shapes        r   get_encoder_num_headsz2FusionMultiHeadAttentionSam2.get_encoder_num_heads9   s     	jj33J4D4DQ4GH"+rzz2tK<M<M7NSTRU7UA/	::22:xKL'C0B0B,Cq,H"jj;;L<N<Nq<QR*!+rzz:tKDUDU?V[\Z]?]$'A$7	i%)a-r   c                     | j                   j                  |j                  d         }|r"t        j                  |      j
                  d   S y)zDetect hidden_size from LayerNormalization node.
        Args:
            layernorm_node (NodeProto): LayerNormalization node before Q, K and V
        Returns:
            int: hidden_size, or 0 if not found
        r"   r   )r   get_initializerr$   r   to_arrayr)   )r   layernorm_nodelayernorm_biass      r   get_hidden_sizez,FusionMultiHeadAttentionSam2.get_hidden_sizeU   sE     33N4H4H4KL''7==a@@r   r9   
is_encoderc                    |r| j                  |      }n| j                  |      }|dk  r| j                  }| j                  dkD  rH|| j                  k7  r9| j                  r-t        j                  d| j                   d| d       d| _        | j                  |      }|dk  r| j                  }| j                  dkD  rH|| j                  k7  r9| j                  r-t        j                  d| j                   d| d       d| _        ||fS )a  Detect num_heads and hidden_size.

        Args:
            reshape_q (NodeProto): reshape node for Q
            layernorm_node (NodeProto): LayerNormalization node before Q, K, V
        Returns:
            Tuple[int, int]: num_heads and hidden_size
        r   z--num_heads is z. Detected value is z. Using detected value.Fz--hidden_size is )	r5   r,   r   r   loggerwarningr;   r   r   )r   r   r9   r<   r   r   s         r   get_num_heads_and_hidden_sizez:FusionMultiHeadAttentionSam2.get_num_heads_and_hidden_sizeb   s    229=I229=I>I>>A)t~~"=%%0@@TU^T__vwx).&**>:!**KaK43C3C$C'''(8(8'99Mk]Zqr ,1(+%%r   q_matmulq_addk_matmulk_addv_matmulv_addoutputNc
           
         |dkD  r$||z  dk7  rt         j                  d| d|        y| j                  j                  |j                  d         }
| j                  j                  |j                  d         }| j                  j                  |j                  d         }|
r|r|syt        j                  |
      }t        j                  |      }t        j                  |      }t         j                  d|j                   d|j                   d|j                   d	|        | j                  j                  d
      }|j                  d   |j                  d   |j                  d   g}t        j                  d
||	g|      }d|_        |j                  j                  t        j                  d|      g       dj!                  d      }| j#                  |       |S )aF  Create an Attention node.

        Args:
            q_matmul (NodeProto): MatMul node in fully connection for Q
            q_add (NodeProto): Add bias node in fully connection for Q
            k_matmul (NodeProto): MatMul node in fully connection for K
            k_add (NodeProto): Add bias node in fully connection for K
            v_matmul (NodeProto): MatMul node in fully connection for V
            v_add (NodeProto): Add bias node in fully connection for V
            num_heads (int): number of attention heads. If a model is pruned, it is the number of heads after pruning.
            hidden_size (int): hidden dimension. If a model is pruned, it is the hidden dimension after pruning.
            output (str): output name

        Returns:
            Union[NodeProto, None]: the node created or None if failed.
        r   zinput hidden size z# is not a multiple of num of heads Nr    zqw=z kw=z vw=z hidden_size=r   inputsoutputsnamecom.microsoftr   MultiHeadAttention ({})zcross attention)r>   debugr   r7   r$   r   r8   r)   create_node_namerG   r	   	make_nodedomain	attributeextendmake_attributeformatincrease_counter)r   rA   rB   rC   rD   rE   rF   r   r   rG   q_weightk_weightv_weightqwkwvwattention_node_nameattention_inputsattention_nodecounter_names                       r   create_attention_nodez2FusionMultiHeadAttentionSam2.create_attention_node   s   8 ?i 7A=LL-k]:]^g]hij::--hnnQ.?@::--hnnQ.?@::--hnnQ.?@X(!!(+!!(+!!(+s288*D
$rxxjk][\"jj99:NO LLOLLOLLO
  )) #H$	
 !0  '')>)>{I)V(WX0778IJl+r   c                 x   | j                  |||      ry | j                  |      }|H|j                  d   |vry ||j                  d      }|j                  dk7  ry | j                  |      }|y |\	  }}}}	}
}}}}|}| j	                  ||d      \  }}|dk  rt
        j                  d       y | j                  |	|
|||||||j                  d   	      }|y | j                  j                  |       | j                  | j                  |j                  <   | j                  j                  ||g       d| _        y )Nr   AddF*fuse_attention: failed to detect num_heads)rG   T)fuse_sam_encoder_patternmatch_attention_subgraphr$   op_typer@   r>   rO   rb   rG   nodes_to_addappendthis_graph_namenode_name_to_graph_namerL   nodes_to_removerT   prune_graph)r   normalize_nodeinput_name_to_nodesoutput_name_to_node	match_qkvskip_addreshape_qkvtranspose_qkvr   matmul_qadd_qmatmul_kadd_kmatmul_vadd_vattention_last_nodeq_num_headsq_hidden_sizenew_nodes                      r   fusez!FusionMultiHeadAttentionSam2.fuse   sr   ((9LNab11.A	##A&.AA*>+?+?+BCH5(55h?I cl`]Ix%QY[`)%)%G%G	Sach%i"]!LLEF --&--a0 . 

   *6:6J6J$$X]]3##%8-$HI  r   c           	         | j                   j                  |g dg d      }|y|\  }}}}}| j                   j                  |g dg d      }|t        j                  d       y|\  }}}}	| j                   j                  |ddgd	d	g      }
|
|
\  }}nt        j                  d
       y| j                   j                  |g dg d      }|t        j                  d       y|\  }}}}}| j                   j                  |g dg d      }|t        j                  d       y|\  }}}}}| j                   j                  |g dg d      }||d   |k7  rt        j                  d       y||||||||	|f	S )z.Match Q, K and V paths exported by PyTorch 2.*rd   MatMulReshape	Transposer   )NNNr   r   N)r   r   rd   r   )r    r   r   Nz&fuse_attention: failed to match v pathSoftmaxr   r   z'fuse_attention: failed to match qk path)Mulr   r   rd   r   )r   Nr   r   Nz&fuse_attention: failed to match q path)r    Nr   r   Nz&fuse_attention: failed to match k path)SqrtDivr   CastSliceShaper   r   )Nr   r    r   r   r   r   r   z*fuse_attention: failed to match mul_q pathr   match_parent_pathr>   rO   )r   node_after_output_projection	qkv_nodes_rt   ru   
matmul_qkvv_nodesr{   rz   qk_nodes_softmax_qk	matmul_qkq_nodesmul_q_transpose_qr   rw   rv   k_nodes_mul_kry   rx   mul_q_nodess                           r   rg   z5FusionMultiHeadAttentionSam2.match_attention_subgraph   s   JJ00(?$
	 9B6A{M:**..z;dfuv?LLAB")Auh:://
Y<QTUWXSYZ'/$[)LLBC**..GI^
 ?LLAB<C9i**..GI^
 ?LLAB*1'Auh jj22U'

 +b/Y">LLEFM9hxQVX`bgggr   c                 <   | j                   j                  |g dg d      }|!| j                   j                  |g dg d      }|| j                   j                  |dgdg      }|y|d   }| j                  |t        |      d	k(  rd	nd 
      }|y|\  }}}	}
}}t	        j
                  |
d      }t        |t              r|g dk7  ryt	        j
                  |d      }t        |t              r|g dk7  ryt	        j
                  |d      }t        |t              r|g dk7  ry| j                   j                  |	g dg d      }|y|\  }}}| j                  ||d      \  }}|dk  rt        j                  d       yd}| j                   j                  |      }|Tt        j                  t        j                  g dd      |      }| j                   j!                  || j"                         | j                   j%                  d      }t'        j(                  d|
j*                  d   |g|
j*                  d   dz   g|      }| j,                  j/                  |       | j"                  | j0                  |j2                  <   |
}|j*                  d   |j*                  d<   |j*                  d   dz   |j4                  d<   t        j                  d|d|       | j7                  ||||      }|yt        | j                   j9                  ||            d	k(  sJ |j4                  d   |j*                  d<   | j,                  j/                  |       | j"                  | j0                  |j2                  <   | j:                  j=                  |g       d| _        y)N)rd   r   r   r   r   Nr   r   )rd   r   r   r   r   r   )r   Nr   r   r   r   rd   r   Fr   r    )input_indexperm)r   r"   r    r0   )r   r"   r0   r    )r   rd   r   )r   r   NTre   bsnh_to_bsd_reshape_dims)r   r   r   int64)dtype)rL   r   _BSDrI   _BNSHzFound MHA: q_num_heads=z q_hidden_size=) r   r   $match_sam_encoder_attention_subgraphr3   r   get_node_attributer%   r(   r@   r>   rO   r7   r
   
from_arrayr&   arrayadd_initializerrk   rP   r	   rQ   r$   ri   rj   rl   rL   rG   create_mha_nodeget_childrenrm   rT   rn   )r   ro   rp   rq   nodesr   matched_sdpareshape_outtranspose_out	split_qkvtranspose_qtranspose_ktranspose_vpermutation_qpermutation_kpermutation_vinput_projection_nodesr-   add_in	matmul_inr}   r~   new_dims_namenew_dimsreshape_q_namer   transpose_k_bnshr   s                               r   rf   z5FusionMultiHeadAttentionSam2.fuse_sam_encoder_pattern2  s   < 

,,6

 =JJ00L%E
 =JJ00E
 =',Ry$@@(3u:?aPT A 
 WcT]I{K "44[&I=$/M\4Q "44[&I=$/M\4Q "44[&I=$/M\4Q!%!=!=("

 ")(>%
FI%)%G%G
Tbdh%i"]!LLEF 3::--m<#..rxx
'/RYfgHJJ&&x1E1EF44Y?$$%%a(-8 &&q)F23	
	 	  +7;7K7K$$Y^^4 '$/$5$5a$8q!%0%6%6q%9G%C"/;.0@-1ABC ''	
  4::**=:MNOSTTTT'q1!  *6:6J6J$$X]]3##]O4  r   c           	         | j                   j                  |g d|ddddg      }|y|\  }}}}}| j                   j                  |g dg d      }|t        j                  d       y|\  }	}}
}| j                   j                  |ddgddg      }||\  }}nt        j                  d	       y| j                   j                  |g d
g d      }|9| j                   j                  |g dg d      }|t        j                  d       y|d   |
k7  ry|d   }| j                   j                  |g d
g d      }|t        j                  d       y|d   |
k7  ry|\  }}}}|||
|||	fS )z%Match SDPA pattern in SAM2 enconder.*r   Nr   )r   SqueezeSplitr   )r    r   r   r   zfailed to match v pathr   r   zfailed to match qk path)r   r   r   r   r   )	r   r   r   r   MaxPoolr   r   r   r   )	r   Nr   r   r   r   r   r   r   zfailed to match q pathr   r    )r    Nr   r   zfailed to match k pathr   )r   r   r   	out_nodesr   r   r   matmul_qk_vr   r   r   rt   r   r   r   r   r   r   mul_kr   
_squeeze_ks                        r   r   zAFusionMultiHeadAttentionSam2.match_sam_encoder_attention_subgraph  s    JJ00(?$a+
	 :C7A{M; **..{<hjvw?LL123:0aK:://i=RUVXYTZ['/$[)LL23**..y:bdst?jj22s.G
 562;)#aj**..y:bdst?LL122;)#.5+ZM9k;P[[[r   r   r   c                    | j                   j                  d      }|j                  d   |j                  d   |j                  d   g}|dz   }t        j                  d||g|      }d|_        |j                  j                  t        j                  d|      g       dj                  d      }	| j                  |	       |S )	a  Create a MultiHeadAttention node for SAM2 encoder.

        Args:
            reshape_q (NodeProto): Reshape node for Q, output is 3D BxSxNH format
            transpose_k (NodeProto): Transpose node for K, output is BNSH format
            transpose_v (NodeProto): Transpose node for V, output is BNSH format
            num_heads (int): number of attention heads. If a model is pruned, it is the number of heads after pruning.

        Returns:
            NodeProto: the MultiHeadAttention node created.
        r   r   _outrI   rM   r   rN   zself attention)r   rP   rG   r	   rQ   rR   rS   rT   rU   rV   rW   )
r   r   r   r   r   r^   rJ   rG   r`   ra   s
             r   r   z,FusionMultiHeadAttentionSam2.create_mha_node  s    & #jj99:NO Qq!q!
 %v-)) H$	
 !0  '')>)>{I)V(WX0778HIl+r   )F)N)__name__
__module____qualname____doc__r   r*   r   r   r,   r5   r;   boolr   r@   strr   rb   r   rg   rf   r   r   __classcell__)r   s   @r   r   r      sS   (( ( 	(y S *	 c 8 SX"&""&4="&KO"&	sCx"&H>> > 	>
 > > > > > > 
y$	>@0 d5htBdh BH5\n)) ) 	)
 ) 
)r   r   )loggingr   typingr   r   numpyr&   fusion_baser   fusion_utilsr   onnxr   r	   r
   
onnx_modelr   r   r>   r    r   r   <module>r      s7   
     $ 0 0  	8	E6 Er   