
    gfA                         d dl mZ d dlmZ d dlZd dlmZ d dlm	Z	 d dl
mZmZ d dlmZmZ d dlmZ  ee      Z G d	 d
e	      Zy)    )	getLogger)TupleN)AttentionMask)Fusion)FusionUtilsNumpyHelper)	NodeProtohelper)	OnnxModelc                   P     e Zd Zdedededef fdZdedeeef   fdZ	d	 Z
 xZS )
FusionQOrderedAttentionmodelhidden_size	num_headsattention_maskc                 R    || _         || _        || _        t        |   |dd       y )NQOrderedAttentionQOrderedLayerNormalization)r   r   r   super__init__)selfr   r   r   r   	__class__s        g/var/www/openai/venv/lib/python3.12/site-packages/onnxruntime/transformers/fusion_qordered_attention.pyr   z FusionQOrderedAttention.__init__   s/     '", 35QR    	reshape_qreturnc                    | j                   j                  |j                  d         }|t        j	                  |j                  d    d       | j                   j                  |dgdg      }|| j                  | j                  fS |d   }t        |j                        dk7  r| j                  | j                  fS |j                  d   j                  }t        j                  |      }t        |      dk7  s|d   dk  s|d   dk  r1t        j	                  d| d	       | j                  | j                  fS |d   }|d   }||z  }| j                  dkD  rH|| j                  k7  r9| j                  r-t        j                  d
| j                   d| d       d| _        | j                  dkD  rH|| j                  k7  r9| j                  r-t        j                  d| j                   d| d       d| _        ||fS )zDetect num_heads and hidden_size from a reshape node.
        Args:
            reshape_q (NodeProto): reshape node for Q
        Returns:
            Tuple[int, int]: num_heads and hidden_size
           z is not initializer.Constantr            zq_shape_value=z7. Expected value are like [0, 0, num_heads, head_size].z--num_heads is z. Detected value is z. Using detected value.Fz--hidden_size is )r   get_initializerinputloggerdebugmatch_parent_pathr   r   len	attributetr   to_arraynum_heads_warningwarninghidden_size_warning)r   r   q_shapeconstant_nodeq_shape_valuer   	head_sizer   s           r   get_num_heads_and_hidden_sizez5FusionQOrderedAttention.get_num_heads_and_hidden_size!   s    **,,Y__Q-?@?LLIOOA.//CDE !JJ88ZLSTRUVM$~~t'7'777 -a 0}../14>>4+;+;;; (11!466#,,W5}"}Q'71'<a@PTU@ULL>-8opq>>4#3#333!!$	!!$	)+>>A)t~~"=%%0@@TU^T__vwx).&aK43C3C$C'''(8(8'99Mk]Zqr ,1(+%%r   c                 ~   | j                   j                  |ddgddg      }||d   }ny | j                   j                  |dgd g      }|t        j                  d       y |d   }| j                   j                  |g dg d      }|t        j                  d	       y |\  }}	}
}}}}t	        j
                  || j                         sy t	        j
                  || j                         sy g }t        |j                        D ]1  \  }}||vr||d   j                  d   k(  r!|j                  |       3 t        |      d
k7  ry |d   }| j                   j                  |g dg d      }|t        j                  d       y |\  }}}}}}t	        j
                  || j                         sy t	        j
                  || j                         sy | j                   j                  |dgd
g      }|t        j                  d       y |d   }| j                   j                  |j                  d         y t	        j
                  || j                   d      sy | j                   j                  |g dg d      }|t        j                  d       y |\  }}}}}}} }!t	        j
                  || j                         sy t	        j
                  || j                         sy t	        j
                  | | j                         sy t	        j
                  || j                         sy | j                   j                  |!g dg d      }"|"t        j                  d       y |"\  }}#}$}%}&}'t	        j
                  |%| j                         sy t	        j
                  |$| j                         sy | j                   j                  |'dgd
g      }(|(t        j                  d       y |(d   }(| j                   j                  |(j                  d         y t	        j
                  |(| j                   d      sy | j                   j                  |!g dg d      })|)t        j                  d       y |)\  }}}*}+},}-t	        j
                  |+| j                         sy t	        j
                  |*| j                         sy | j                   j                  |-dgd
g      }.|.t        j                  d       y |.d   }.| j                   j                  |.j                  d         y t	        j
                  |.| j                   d      sy | j                   j                  |g dg d      }/|/t        j                  d       y | j                   j                  |(j                  d         }0| j                   j                  |.j                  d         }1| j                   j                  |j                  d         }2t        j                  |0      }3t        j                  |1      }4t        j                  |2      }5t        j                   |3j"                  d
d        }6t        j                   |4j"                  d
d        }7t        j                   |5j"                  d
d        }8|j                  d   |k(  r|'j                  d   |k(  r|-j                  d   |k(  ry| j$                  j'                  |/d   j                  d         }9| j)                  |#      \  }:};|j                  d   g}<|<j                  |j                  d
          |<j                  |$j                  d
          |<j                  |*j                  d
          |<j                  |j                  d
          |<j                  |(j                  d          |<j                  |.j                  d          |<j                  |j                  d          |<j                  |(j                  d
          |<j                  |.j                  d
          |<j                  |j                  d
          | j                   j                  |&j                  d         r|<j                  |&j                  d          n|<j                  |&j                  d
          | j                   j                  |,j                  d         r|<j                  |,j                  d          n|<j                  |,j                  d
          | j                   j                  |j                  d         r|<j                  |j                  d          n|<j                  |j                  d
          |<j                  | j                  d
          |<j                  |j                  d
          |<j                  |j                  d
          |9|<j                  |9       n|<j                  d       | j                   j                  |(j                  d         }=t	        j*                  |=       | j                   j                  |.j                  d         }>t	        j*                  |>       | j                   j                  |j                  d         }?t	        j*                  |?       | j                   j-                  d      }@t/        j0                  d|<|
j                  d   g|@      }A| j                   j3                  ||j                  d   |Aj                  d          | j                   j3                  |	|	j                  d   |j                  d          |Aj4                  j7                  t/        j8                  d|:      g       |Aj4                  j7                  t/        j8                  dd
      g       |Aj4                  j7                  t/        j8                  dd      g       |Aj4                  j7                  t/        j8                  dd
      g       |Aj4                  j7                  t/        j8                  d|6|7|8g      g       d |A_        | j<                  j                  |A       | j>                  | j@                  |AjB                  <   | jD                  j7                  |
|||g       | jD                  j7                  |       | jD                  j7                  |"       | jD                  j7                  |)       | jD                  j7                  |       | jD                  j7                  |(|.|g       d!| _#        y y y y )"NQuantizeLinearAddr   DequantizeLinearz=fuse_qordered_attention: failed to match input qdq nodes path)r6   MatMulReshape	Transposer8   r5   r9   )NNr   r   r   r   r   z1fuse_qordered_attention: failed to match qkv pathr   )r;   r:   r8   r5   r6   r9   )r   r   r   r   r   Nz/fuse_qordered_attention: failed to match v pathF)r8   r5   Softmaxr6   Divr8   r5   r9   )r   r   r   r   Nr   r   r   z0fuse_qordered_attention: failed to match qk path)r   r   r   r   r   Nz/fuse_qordered_attention: failed to match q pathz/fuse_qordered_attention: failed to match k path)MulSubCast	UnsqueezerA   )Nr   r   r   r   z8fuse_qordered_attention: failed to match mask_nodes path r   )inputsoutputsnamer   order_inputorder_weightorder_outputqkv_hidden_sizeszcom.microsoftT)$r   r'   r%   r&   r   check_qdq_node_for_fusion	enumerater$   outputappendr(   get_constant_valuer#   r   r+   npprodshaper   process_maskr3   transpose_2d_int8_tensorcreate_node_namer
   	make_nodereplace_node_inputr)   extendmake_attributedomainnodes_to_addthis_graph_namenode_name_to_graph_namerE   nodes_to_removeprune_graph)Br   normalize_nodeinput_name_to_nodesoutput_name_to_nodeadd_before_layernorm
start_nodedequantize_input	qkv_nodes_projection_matmulreshape_qkvtranspose_qkvdequantize_qkvquantize_qkv
matmul_qkvother_inputs_ir$   
root_inputv_nodesdequantize_v
quantize_vadd_vmatmul_vdequantize_v_matmul_weightqk_nodesdequantize_qk_softmaxquantize_qk_softmax
softmax_qkadd_qkdiv_qkdequantize_qkquantize_qk	matmul_qkq_nodesr   dequantize_q
quantize_qadd_qmatmul_qdequantize_q_matmul_weightk_nodesdequantize_k
quantize_kadd_kmatmul_kdequantize_k_matmul_weight
mask_nodesq_weightk_weightv_weightqwkwvwqw_out_sizekw_out_sizevw_out_size
mask_indexr   r   attention_inputsq_weight_tensork_weight_tensorv_weight_tensorattention_node_nameattention_nodesB                                                                     r   fusezFusionQOrderedAttention.fuseT   s   #zz;;u%F 
  +-b1J  ::77 F
 #LLXY+B/ JJ00e'
	 LLLMgpd{M><Yc 44\4::N44^TZZP ":#3#34IB//	!++A..& 5 |!!!_
 **..[!
 ?LLJK<C9A|Z 44ZL44\4::N &*ZZ%A%A(M_L`cdbe%f"%-LLJK%?%B"::(()C)I)I!)LMU 445OQUQ[Q[]bc :://	 (
 LLKL 		
! 445H$**U445JDJJW44[$**M44]DJJO **..[!
 ?LLJKDKAI|Z 44ZL44\4::N &*ZZ%A%A(M_L`cdbe%f"%-LLJK%?%B"::(()C)I)I!)LMU 445OQUQ[Q[]bc **..[!
 ?LLJK<C9A|Z 44ZL44\4::N &*ZZ%A%A(M_L`cdbe%f"%-LLJK%?%B"::(()C)I)I!)LMU 445OQUQ[Q[]bc ZZ11DFX

 LLST ::--.H.N.Nq.QR::--.H.N.Nq.QR::--.H.N.Nq.QR!!(+!!(+!!(+ggbhhqrl+ggbhhqrl+ggbhhqrl+ >>!
*x~~a/@J/NS[SaSabcSdhrSr,,99*R.:N:Nq:QRJ &*%G%G	%R"I{ !1 6 6q 9:##$4$:$:1$=>##L$6$6q$9:##L$6$6q$9:##L$6$6q$9:##$>$D$DQ$GH##$>$D$DQ$GH##$>$D$DQ$GH##$>$D$DQ$GH##$>$D$DQ$GH##$>$D$DQ$GHzz))%++a.9 ''A7 ''A7zz))%++a.9 ''A7 ''A7zz))%++a.9 ''A7 ''A7##K$5$5a$89##$7$=$=a$@A##N$8$8$;< % ''
3 ''+ #jj889S9Y9YZ[9\]O00A"jj889S9Y9YZ[9\]O00A"jj889S9Y9YZ[9\]O00A #'**"="=>Q"R#--#'$++A./(	N JJ))..:N:Nq:QSaShShijSklJJ))*;=N=T=TUV=WYgYnYnopYqr$$++V-B-B;PY-Z,[\$$++V-B-B=RS-T,UV$$++V-B-B>ST-U,VW$$++V-B-B>ST-U,VW$$++&&'9KVa;bcd %4N!$$^4@D@T@TD(()<)<=  ''m\S](^_  ''1  ''0  ''0  ''0  ''+-GIcd  $DE Ts/N*r   )__name__
__module____qualname__r   intr   r   r	   r   r3   r   __classcell__)r   s   @r   r   r      sV    SS S 	S
 &S1&y 1&U3PS8_ 1&fQ$r   r   )loggingr   typingr   numpyrO   fusion_attentionr   fusion_baser   fusion_utilsr   r   onnxr	   r
   
onnx_modelr   r   r%   r    r   r   <module>r      s7       *  1 "  	8	R$f R$r   