
    gw                        d dl mZ d dlmZmZ d dlZd dlmZ d dl	m
Z
 d dlmZmZ d dlmZmZ d dlmZ d d	lmZmZmZmZmZ d d
lmZ  ee      Z G d d      Z G d d      Z G d d      Z G d d      Z  G d d      Z! G d d      Z" G d d      Z# G d d      Z$ G d de
      Z% G d de      Z& G d d e%      Z' G d! d"e%      Z( G d# d$e%      Z) G d% d&e%      Z* G d' d(e      Z+y))    )	getLogger)ListOptionalN)DynamoOnnxHelper)Fusion)AttentionOpTypeFusionOptions) FusionBiasSkipLayerNormalizationFusionSkipLayerNormalization)NumpyHelper)
ModelProto	NodeProtoTensorProtohelpernumpy_helper)	OnnxModelc                       e Zd Zd Zy)ProcessGemmWFuncc                 .    t        j                  |d      S )N   r   )np	transposeselfxs     \/var/www/openai/venv/lib/python3.12/site-packages/onnxruntime/transformers/onnx_model_phi.py__call__zProcessGemmWFunc.__call__   s    ||Av&&    N__name__
__module____qualname__r    r   r   r   r      s    'r   r   c                       e Zd Zd Zy)ProcessMatMulQFuncc                 ^    t        j                  t        j                  |dd      d   d      S )N   r   r   r   r   splitr   s     r   r   zProcessMatMulQFunc.__call__   %    ||BHHQ1-a0&99r   Nr    r$   r   r   r&   r&          :r   r&   c                       e Zd Zd Zy)ProcessMatMulKFuncc                 ^    t        j                  t        j                  |dd      d   d      S )Nr(   r   r   r   r)   r   s     r   r   zProcessMatMulKFunc.__call__    r+   r   Nr    r$   r   r   r.   r.      r,   r   r.   c                       e Zd Zd Zy)ProcessMatMulVFuncc                 ^    t        j                  t        j                  |dd      d   d      S )Nr(   r      r   r)   r   s     r   r   zProcessMatMulVFunc.__call__%   r+   r   Nr    r$   r   r   r1   r1   $   r,   r   r1   c                       e Zd Zd Zy)ProcessBiasQFuncc                 :    t        j                  |dd      d   }|S )Nr(   r   r   r*   r   s     r   r   zProcessBiasQFunc.__call__*       HHQ2q!r   Nr    r$   r   r   r5   r5   )       r   r5   c                       e Zd Zd Zy)ProcessBiasKFuncc                 :    t        j                  |dd      d   }|S )Nr(   r7   r   r8   r   s     r   r   zProcessBiasKFunc.__call__0   r9   r   Nr    r$   r   r   r<   r<   /   r:   r   r<   c                       e Zd Zd Zy)ProcessBiasVFuncc                 :    t        j                  |dd      d   }|S )Nr(   r7   r3   r8   r   s     r   r   zProcessBiasVFunc.__call__6   r9   r   Nr    r$   r   r   r?   r?   5   r:   r   r?   c                       e Zd Zd Zy)ProcessRotCacheFuncc                 t    t        |j                        dk(  sJ |j                  d   dk(  r|d d ddf   S |S )Nr3   r       r      )lenshaper   s     r   r   zProcessRotCacheFunc.__call__<   s?    177|q   771:Q"W:r   Nr    r$   r   r   rB   rB   ;   s    r   rB   c                       e Zd Zdedee   f fdZdefdZd Z	d Z
d Zd	 Zdd
Zd Zd Zd Zdee   dedee   fdZddee   dee   defdZddee   dee   defdZd dee   dee   defdZddee   dee   defdZddee   dee   defdZd!dee   dee   defdZd!dee   dee   defdZd!dee   dee   defdZ	 	 	 	 d"dee   dee   defdZ xZS )#Fissionmodelnodes_to_findc                 (    t         |   |d|       y )NDONOTUSEsuper__init__)r   rJ   rK   	__class__s      r   rP   zFission.__init__F   s    
 	
M:r   attn_op_typec                     || _         y N)rR   )r   rR   s     r   set_attention_op_typezFission.set_attention_op_typeM   s
    (r   c                 $    |dz   t        |      z   S )N_)str)r   layer_idnames      r   	get_unamezFission.get_unameP   s    czCM))r   c                     |D ]-  }||k(  s#|j                  |      s|j                  |      s+|c S  t        d| d      )NzEdge z
 not found)endswith
startswith
ValueError)r   edgesrZ   edges       r   get_edge_by_namezFission.get_edge_by_nameS   sD    Dt|t}}T2dood6K  5j122r   c                 :    | j                  |j                  |      S rT   )rb   inputr   noderZ   s      r   get_input_by_namezFission.get_input_by_nameY   s    $$TZZ66r   c                 :    | j                  |j                  |      S rT   )rb   outputre   s      r   get_output_by_namezFission.get_output_by_name\   s    $$T[[$77r   c                    | j                   j                  |      }t        j                  |      } ||      }t	        j
                  ||dz   n|t        j                  |j                  |j                         j                         d      }| j                   j                  || j                         |j                  S )N
_processedT	data_typedimsvalsraw)rJ   get_initializerr   to_arrayr   make_tensorr   FLOATrG   flattentobytesadd_initializerthis_graph_namerZ   )r   initializer_namefunctorcustom_namei
i_np_arrayprocessed_i_np_array
new_tensors           r   process_initializerzFission.process_initializer_   s    JJ&&'78 ))!,
&z2''/:/B|+!''%++%--/779

 	

"":t/C/CDr   c                     | j                   j                         j                  j                         }||_        t
        j                  |j                  j                  _	        y rT   )
rJ   graph
value_infoaddrZ   r   ru   typetensor_type	elem_typer   rZ   new_value_infos      r   add_fp32_value_infozFission.add_fp32_value_infom   E    ))+66::<"4?4E4E''1r   c                     | j                   j                         j                  j                         }||_        t
        j                  |j                  j                  _	        y rT   )
rJ   r   r   r   rZ   r   INT64r   r   r   r   s      r   add_int64_value_infozFission.add_int64_value_infor   r   r   c                    | j                   j                         j                  D ]F  }|j                  |k(  s| j                   j                         j                  j	                  |        n t        j                  |t        j                  |      }| j                   j                         j                  j                  |g       y )Nr   rG   )
rJ   r   r   rZ   remover   make_tensor_value_infor   ru   extend)r   rZ   rG   r   r   s        r   replace_fp32_value_infozFission.replace_fp32_value_infow   s    ****,77J$&

  "--44Z@ 8  66!''

 	

%%,,n-=>r   subgraph_nodesrY   layer_known_edges_namesc                 h   |D ],  }t        |j                        D ]M  \  }}|dk(  r||vs| j                  ||      |j                  |<   | j                  |j                  |          O t        |j                        D ]M  \  }}|dk(  r||vs| j                  ||      |j                  |<   | j                  |j                  |          O | j                  ||j
                        |_        | j                  j                  |       | j                  | j                  |j
                  <   / y )N )
	enumeraterd   r[   r   ri   rZ   nodes_to_addappendry   node_name_to_graph_name)r   r   rY   r   new_noder}   rZ   s          r   set_unique_name_and_add_nodesz%Fission.set_unique_name_and_add_nodes   s
    'H$X^^442:!88(,x(FHNN1%,,X^^A->? 5 %X__542:!88)-$)GHOOA&,,X__Q-?@ 6 !NN8X]]CHM$$X.:>:N:ND((7 'r   inputsoutputsprefixc                     t        |      dk(  sJ t        |      dk(  sJ t        j                  d|||dz   d      }|gS )Nr(   r   LayerNormalization_LayerNormalizationg   >)r   r   rZ   epsilonrF   r   	make_noder   r   r   r   rf   s        r   	layernormzFission.layernorm   sT    6{a7|q    //)
 vr   c                     t        |      dk(  sJ t        |      dk(  sJ t        j                  d|d   |d   g|dz   g|dz         }t        j                  d|dz   |d   g||d	z         }||gS )
Nr(   r   MatMulr   
matmul_outr   r   rZ   Addr3   Biasr   )r   r   r   r   matmulr   s         r   gemmzFission.gemm   s    6{a7|q   !!1Ivay)l*+("	
 \)6!95&	
 }r   c           	          t        |      dk(  sJ t        |      dk(  sJ t        j                  d|||dz   d||      }|gS )N   r   RotaryEmbeddingcom.microsoft)r   r   rZ   domainrotary_embedding_dim	num_headsr   )r   r   r   r   rot_dimr   rf   s          r   rotaryzFission.rotary   sZ    6{a7|q   ++"!(
 vr   c                     t        |      dk(  sJ t        |      dk(  sJ t        j                  d|||dz   d      }|gS )Nr   FastGelur   )r   r   rZ   r   r   r   s        r   fastgeluzFission.fastgelu   sS    6{a7|q   *$"
 vr   c                     t        |      dk(  sJ t        |      dk(  sJ t        j                  d|||dz         }|gS )Nr3   r   r   r   r   r   s        r   r   zFission.add   sO    6{a7|q   %	
 vr   c           	          t        |      dk(  sJ t        |      dk(  sJ t        j                  d|||dz   d|d      }|gS )N   r(   MultiHeadAttentionr   r   )r   r   rZ   r   r   unidirectionalr   r   r   r   r   r   rf   s         r   mhazFission.mha   sZ    6{a7|q    .."
 vr   c           	          t        |      dk(  sJ t        |      dk(  sJ t        j                  d|||dz   d||      }|gS )N   r(   GroupQueryAttentionr   )r   r   rZ   r   r   kv_num_headsr   r   s         r   gqazFission.gqa   sZ    6{a7|q   !//""
 vr   c                     t        |      dk(  sJ t        |      dk(  sJ t        j                  d|||dz   d|ddd	      }|gS )N   r3   	Attentionr   r   rD   )r   r   rZ   r   r   r   	do_rotaryr   r   r   s         r   	attentionzFission.attention   s_    6{a7|q   +%"!#

 vr   c                     t        |      dk(  sJ t        |      dk(  sJ t        j                  d|||dz   d||||	      }|gS )N   r   PagedAttentionzvllm.ort.ext)r   r   rZ   r   r   num_kv_heads	head_sizescaler   )r   r   r   r   r   r   r   rf   s           r   
paged_attnzFission.paged_attn  sb     6{a7|q   **!"

 vr   rT   )r   )r   rD   rD   )r   rD   )r   rD   P   g   %?)r!   r"   r#   r   r   rX   rP   r   rU   r[   rb   rg   rj   r   r   r   r   r   intr   r   r   r   r   r   r   r   r   r   __classcell__rQ   s   @r   rI   rI   E   s   ;; Cy;)/ )*378F
F

?O"9oO9<OW[\_W`O(
S	 
DI 
s 
49 tCy # "T#Y c C 
tCy 
49 
c 
	$s) 	d3i 	 	$s) d3i  $s) d3i  S	 DI s ( !S	 c 	r   rI   c                   V     e Zd Zdededef fdZdefdZd Zde	fd	Z
de	fd
Z xZS )Phi2PreProcessorrJ   r   hidden_sizec                 \    t         |   |       d| _        || _        || _        d| _        y )NrD   modeling_phi_PhiModel_model_1)rO   rP   num_hidden_layersnum_attention_headsr   	func_namer   rJ   r   r   rQ   s       r   rP   zPhi2PreProcessor.__init__  s/    !##, &8r   returnc                    i }d|d<   d|d<   d|d<   d|d<   t        d	| j                  d	      D ]0  }d
| |d| <   d| |d| <   d| |d| d<   d| |d| d<   2 | j                  j                  j                  D cg c]  }|j
                   }}d|v rd|v rd|d<   d|d<   |S d|v rd|v sJ d|d<   d|d<   |S c c}w )Nlogits	lm_head_1	input_idsl_input_ids_
past_key_0
key_statespast_value_0value_statesr   	past_key_key_states_past_value_value_states_present_key_model_layers__1present_value__1_1model_layers_0_1_1model_layers_0_1_2present_key_0present_value_0model_layers_0_1)ranger   rJ   r   ri   rZ   )r   	edge_dictr}   or   s        r   get_phi2_edge_dictz#Phi2PreProcessor.get_phi2_edge_dict'  s>   	!)	+$/	.!".	,$2	.!q$00!4A-6qc?IA3'(/:1#->IaS)*1=aS/AIaS+,3A!1EIaS-.	 5 $(::#3#3#:#:;#:a166#:;7*/Cw/N.=I*+.?I*+
  &05IW5TTT,;I().?I*+ <s   Cc                     d}| j                   j                  j                  D ]7  }|j                  j	                  |      }|dk7  s$|j                  |d  |_        9 y )N)modeling_phi_PhiDecoderLayer_model_layersr7   )rJ   r   rf   op_typefind)r   phi2_transformer_layer_namerf   indexs       r   simplify_phi2_op_typez&Phi2PreProcessor.simplify_phi2_op_type=  sP    &Q#JJ$$))DLL%%&ABE{#||EF3 *r   rR   c                 
   |t         j                  k(  | _        |t         j                  k(  | _        | j
                  j                  }g }|j                  D ]A  }d|j                  v r t        j                  |j                  | j                  st        j                  nt        j                  ddg      }t        j                  dt        j                  dg      }t        j                  dt        j                  ddg      }t        j                  dt        j                  ddg      }t        j                  d	t        j                  dg      }	| j                  s|j                  |||g      n|j                  |||	g       | j                  rd
|j                  v sOt        j                  |j                  j                  d
d      |j                   j"                  j$                  dd| j&                  d| j(                  | j&                  z  g      }
|j                  |
g       | j                  rd
|j                  v rTt        j                  |j                  |j                   j"                  j$                  g d      }
|j                  |
g       d|j                  v sWt        j                  |j                  |j                   j"                  j$                  g d      }
|j                  |
g       d
|j                  v sd|j                  v st        j                  |j                  |j                   j"                  j$                  d| j&                  d| j(                  | j&                  z  g      }
|j                  |
g       D |j+                  d       |j                  j                  |       g }t-        |j.                        D ]F  \  }}|dk(  r|j                  |g       | j                  rd|j                  v s:t        j                  |j                  j                  dd      |j                   j"                  j$                  dd| j&                  d| j(                  | j&                  z  g      }
|j                  |
g       | j                  rt        j                  |j                  |j                   j"                  j$                  d| j&                  d| j(                  | j&                  z  g      }
|j                  |
g       I |j+                  d       |j.                  j                  |       y )Nr   
batch_sizeseq_lenr   stepr   position_idsattention_maskinput_metadatapast_keypastr3   past_seq_len)
num_blocksr   head_size_x
block_sizeblock_x
past_value)r  r   r   r  rd   r   present_keypresenttotal_seq_lenri   )r   r   use_attnr   use_vllmrJ   r   rd   rZ   r   r   r   INT32r   r   replacer   r   r   r   r   
ClearFieldr   ri   )r   rR   r   
new_inputsvivi_iidvi_stepvi_pidvi_maskvi_metavi_cachenew_outputsr}   s                r   process_graph_ioz!Phi2PreProcessor.process_graph_ioD  s^   $(A(AA$(F(FF

  
++Bbgg%66GG7;}}k//+J[J['3
 !77)//#
  66")//'3
 !77$)//'3
 !77$)//#  == %%vw&@A#**FFG+DE}}(%<<
F;"$''"5"5"?"?( 44* ,,0H0HH
 H %%xj1(%<<"$''"5"5"?"?a H
 %%xj1277*%<<"$''"5"5"?"?	 H %%xj1(LBGG,C%<<"$''"5"5"?"?( 44* ,,0H0HH		 H %%xj1] ` 	!:&u||,EArAv""B4(==$/#)#@#@GGOOM9E&(gg&9&9&C&C ! , $ 8 8 / $ 0 0D4L4L L#
$ $**H:6]]%<<"$''"5"5"?"?( 44+ ,,0H0HH		 H  &&z2? -B 	"K(r   c                    d }| j                   j                  D ]5  }|j                  j                  | j                        s)|j                  } n |J | j                  |       | j                  | j                                | j                          | j                          |t        j                  k(  r| j                          | j                  |       y rT   )rJ   	functionsrZ   r]   r   unroll_functionupdate_edgesr   r  remove_dropout_layerr   r   remove_lm_head_layerr'  )r   rR   function_namefuncs       r   preprocess_onnxz Phi2PreProcessor.preprocess_onnx  s    JJ((Dyy!!$..1 $		 ) (((]+$1134""$!!#?999%%'l+r   )r!   r"   r#   r   r   rP   dictr   r  r   r'  r0  r   r   s   @r   r   r     sI    9j 9S 9s 9D ,4{)_ {)z,O ,r   r   c                   *     e Zd Zdef fdZd Z xZS )FissionTransformerEmbeddingPhirJ   c                 (    t         |   |dg       y )N6torch_nn_modules_sparse_Embedding_model_embed_tokens_1rN   r   rJ   rQ   s     r   rP   z'FissionTransformerEmbeddingPhi.__init__  s     	!Y Z[r   c                    t         j                  d|j                         t        |j                        dk(  sJ t        |j
                        dk(  sJ |j                  d   }|j
                  d   }| j                  |d      }|||g}t        j                  d||g|gd      g}| j                  |d|       | j                  j                  |       d	| _        y )
NOptimizing %s...r3   r   r   zembed_tokens.weightGatherEmbedding_Gatherr   T)loggerinforZ   rF   rd   ri   rg   r   r   r   nodes_to_remover   prune_graph)	r   rf   input_name_to_nodesoutput_name_to_noderd   ri   	embeddingr   r   s	            r   fusez#FissionTransformerEmbeddingPhi.fuse  s    &		24::!###4;;1$$$

1Q**41FG	#(&)"< !5)'	
 	**>1>UV##D)r   r!   r"   r#   r   rP   rB  r   r   s   @r   r3  r3    s    \\ r   r3  c                   *     e Zd Zdef fdZd Z xZS )FissionTransformerLayerNormPhirJ   c                 (    t         |   |dg       y )N@torch_nn_modules_normalization_LayerNorm_model_final_layernorm_1rN   r6  s     r   rP   z'FissionTransformerLayerNormPhi.__init__  s     	!c der   c                 F   t         j                  d|j                         t        |j                        dk(  sJ t        |j
                        dk(  sJ |j                  d   }|j
                  d   }| j                  |d      }| j                  |d      }||||g}g }	|	j                  | j                  |||g|gd             | j                  |	d|       | j                  |g d	       | j                  |g d	       | j                  j                  |       d
| _        y )Nr8  r(   r   r   zfinal_layernorm.weightzfinal_layernorm.biasFinalc   r  r	  r   T)r;  r<  rZ   rF   rd   ri   rg   r   r   r   r   r=  r   r>  )
r   rf   r?  r@  rd   ri   	ln_weightln_biasr   r   s
             r   rB  z#FissionTransformerLayerNormPhi.fuse  s   &		24::!###4;;1$$$

1Q**41IJ	((/EF#(&)W"EdnneY-H6(T[\]**>2?VW$$U,TU$$V-UV##D)r   rC  r   s   @r   rE  rE    s    ff r   rE  c                   *     e Zd Zdef fdZd Z xZS )!FissionTransformerCausalLMHeadPhirJ   c                 (    t         |   |dg       y )N(torch_nn_modules_linear_Linear_lm_head_1rN   r6  s     r   rP   z*FissionTransformerCausalLMHeadPhi.__init__  s     	!K LMr   c                 v   t         j                  d|j                         t        |j                        dk(  sJ t        |j
                        dk(  sJ |j                  d   }|j
                  d   }| j                  | j                  |d      t                     }| j                  |d      }||||g}g }	|	j                  | j                  |||g|gd             | j                  |	d	|       | j                  |g d
       | j                  |g d       | j                  j                  |       d| _        y )Nr8  r   r   r3   r   zlm_head.weightzlm_head.biasLMHead_rJ  rK  )r  r	  i   T)r;  r<  rZ   rF   rd   ri   r   rg   r   r   r   r   r   r=  r   r>  )
r   rf   r?  r@  rd   ri   	fc_weightfc_biasr   r   s
             r   rB  z&FissionTransformerCausalLMHeadPhi.fuse  s   &		24::!###4;;1$$$

1Q,,T-C-CDJZ-[]m]op	((~>#(&)W"Edii	7(CfXyYZ**>2?VW$$U,TU$$V-MN##D)r   rC  r   s   @r   rO  rO    s    NN r   rO  c                   @     e Zd Zdedef fdZd Zd Zd Zd Z	 xZ
S )FissionTransformerBlockPhirJ   r   c                     || _         d}i | _        g }t        |      D ](  }d| d}|j                  |       || j                  |<   * t        |   ||       y )NrD   *modeling_phi_PhiDecoderLayer_model_layers_r   )r   func_to_layer_idr   r   rO   rP   )r   rJ   r   max_num_layersrK   layerr   rQ   s          r   rP   z#FissionTransformerBlockPhi.__init__6  sj    
 # ">*EDUG2NI  +/4D!!), +
 	.r   c                 4    | j                   |j                     S rT   )rZ  r  )r   rf   s     r   get_layer_idz'FissionTransformerBlockPhi.get_layer_idF  s    $$T\\22r   c                    t        j                  ddgdgdt        j                        t        j                  dddgdgd	
      t        j                  dddgdgd
      t        j                  ddgdgdt        j                        t        j                  ddgdgd
      t        j                  dddgdgdd      t        j                  ddgdgdt        j                        g}|S )NCastr  
mask_int64Cast_gqa_aux_0)r   r   rZ   to	ReduceSumonemask_row_sumsReduceSum_gqa_auxr   Subseqlens_k_int64Sub_gqa_aux	seqlens_kCast_gqa_aux_1Shape
mask_shapeShape_gqa_aux_0r9  total_seq_len_int64Gather_gqa_aux_0r   )r   r   rZ   axistotal_sequence_lengthCast_gqa_aux_2)r   r   r   r   r  )r   gqa_aux_nodess     r   get_gqa_aux_nodesz,FissionTransformerBlockPhi.get_gqa_aux_nodesI  s   ()%%$$ $e,()(	 '/*+"	 )*$%$$ Wl^l^Zkl$e,./' -.01%$$G*
V r   c	                 n   | j                   j                  |      }	| j                   j                  |      }
| j                   j                  |      }t        j                  t	        j
                  |	      d      }t        j                  t	        j
                  |
      d      }t        j                  t	        j
                  |      d      }t        j                  |||fd      }| j                   j                  |      }| j                   j                  |      }| j                   j                  |      }t	        j
                  |      }t	        j
                  |      }t	        j
                  |      }t        j                  |||fd      }|j                  d   }t        j                  |t        j                  ||dz  g|j                         j                         d      }| j                   j                  || j                         t        j                  |t        j                  |dz  g|j                         j                         d      }| j                   j                  || j                         | j!                  |j"                         | j!                  |j"                         ||fS )Nr   r   )rr  r   r(   Trm   )rJ   rr   r   r   r   rs   stackrG   r   rt   r   ru   rv   rw   rx   ry   r   rZ   )r   q_wk_wv_wq_bk_bv_bweight_name	bias_nameq_weightk_weightv_weightqwkwvw
qkv_weightq_biask_biasv_biasqbkbvbqkv_biasr   weightbiass                             r   pack_qkv_gemmz(FissionTransformerBlockPhi.pack_qkv_gemmw  s   ::--c2::--c2::--c2\\+..x8&A\\+..x8&A\\+..x8&AXXr2rl3
++C0++C0++C0!!&)!!&)!!&)88RRLq1 &&q)##!''{Q/##%--/
 	

""64+?+?@!!!''/"!!#++-
 	

""4)=)=>  -  +I%%r   c                    t         j                  d|j                         t         j                  d| j                          | j	                  |      }|j
                  d   }| j                  |d      }| j                  |d      }|j                  d   }| j                  |d      }	| j                  |d      }
| j                  |d	      }| j                  |d
      }d\  }}}}}}d\  }}d\  }}| j                  t        j                  k7  r	| j                  | j                  |d      t                     }| j                  | j                  |d      t                     }| j                  | j                  |d      t                     }| j                  |d      }| j                  |d      }| j                  |d      }| j                  | j                  |d      t                     }| j                  | j                  |d      t                     }n| j                  | j                  |d      | j                  |d      | j                  |d      | j                  |d      | j                  |d      | j                  |d      | j                  |d      | j                  |d            \  }}| j                  | j                  |d      t                     }| j                  |d      }| j                  | j                  |d      t                     }| j                  | j                  |d      t                     }| j                  |d      }| j                  |d      }g }|j!                  |||g       |j!                  ||	|
g       |j!                  ||g       | j                  t        j                  k7  r|j!                  ||||||||g       n|j!                  ||g       |j!                  ||||||g       |j!                  g d       g }|j!                  | j#                  |||gdg             |j!                  | j%                  d||gd gd!             |j!                  | j%                  d||gd"gd#             |j!                  | j'                  d"gd$g             |j!                  | j%                  d$||gd%gd&             |j!                  | j)                  d d%gd'gd(             |j!                  | j)                  |d'g|gd)             | j                  t        j                  k7  rg|j!                  | j%                  d||gd*gd+             |j!                  | j%                  d||gd,gd-             |j!                  | j%                  d||gd.gd/             | j                  t        j*                  k(  rd0nd1}|j!                  | j-                  d*|||gd2gd+             |j!                  | j-                  d,|||gd3gd-             | j                  t        j.                  k(  r.|j!                  | j1                  d2d3d.d4d5d4||gd|	|
g             n| j                  t        j2                  k(  r|j!                  | j5                  d2d3d.||d6d7gd|	|
g             |dk(  r2| j7                         } | D ]@  }!| j8                  j;                  |!       | j<                  | j>                  |!j                  <   B | j@                  jC                  tE        jF                  tI        jJ                  d8gd9:      d;<      | j<                         n| j                  t        j*                  k(  rn|j!                  | jM                  d2d3d.||d=gdg             nEd>| }"d?| }#|j!                  |"|#g       |j!                  | jO                  d||d5|"gd|#g             | jQ                  |||       | jS                  |g d@       | jS                  |g d@       | jT                  j;                  |       dA| _+        y )BNr8  zAttentionOpType: r   r  r  r7   r  present_valuezinput_layernorm.weightzinput_layernorm.bias)NNNNNN)NNzself_attn.q_proj.weightzself_attn.k_proj.weightzself_attn.v_proj.weightzself_attn.q_proj.biaszself_attn.k_proj.biaszself_attn.v_proj.biaszrotary_emb.cos_cachedzrotary_emb.sin_cachedattn_qkv_weightattn_qkv_biaszself_attn.dense.weightzself_attn.dense.biaszmlp.fc1.weightzmlp.fc2.weightzmlp.fc1.biaszmlp.fc2.bias)r  r
  rk  rs  r  r  ln_outattn_outattn_add_outOutProj_fc1_outFC1_gelu_outfc2_outFC2_residual_1_out
Residual_1
Residual_2queryQ_keyK_valueV_r  r
  	query_rotkey_rotr   r  rk  rs  r   int64)dtypere  )rZ   r  past_present_rK  T),r;  r<  rZ   rR   r^  rd   rg   ri   rj   r   r   r   r   rB   r  r[   r   r   r   r   r   r   r   r   r   r   r   rv  r   r   ry   r   rJ   rx   r   
from_arrayr   arrayr   r   r   r   r=  r>  )$r   rf   r?  r@  rY   i_hidden_statesi_key_cachei_value_cacheo_hidden_stateso_key_cacheo_value_cacherL  rM  attn_q_weightattn_q_biasattn_k_weightattn_k_biasattn_v_weightattn_v_biasr  r  	cos_cache	sin_cacheattn_out_weightattn_out_biasmlp_fc1_weightmlp_fc2_weightmlp_fc1_biasmlp_fc2_biasr   r   pos_ids_nameru  r   	past_namepresent_names$                                       r   rB  zFissionTransformerBlockPhi.fuse  sA    	&		2'(9(9':;<$$T***Q-,,T:>..t\B++b/--dMB//oF**41IJ	((/EF^
Z{M;{ *4&)	9 9 99 44&&t-FGIYI[M !44&&t-FGIYI[M !44&&t-FGIYI[M 007NOK007NOK007NOK00&&t-DEGZG\I 00&&t-DEGZG\I .2-?-?&&t-FG&&t-FG&&t-FG&&t-DE&&t-DE&&t-DEx):;x9	.*O] 22""4)ABDTDV
 ..t5KL11$2H2HO_2`brbtu11$2H2HO_2`brbtu--dNC--dNC"$&&m'TU&&m'TU&&	7';< 9 99#**!!!	 $**O]+KL&&m^\>[gh	
 	 &&n	
 dnnoy'-RU]T^_`dii_m(TWeVfhrstdii><(PS\R]_efgdmmYK*FGdii^\(RU^T_aghidhh	'BEUDVXdefdhh9I'J_L]_klm 9 99!!$))X}k,RU\T]_c"de!!$))X}k,RUZT[]a"bc!!$))X}k,RU\T]_c"de-1->->/B`B`-`>flL!!$++wiQZ.[^i]jlp"qr!!$++ulIy.Y\e[fhl"mn  O$F$FF%%HH$i">NPRT_ano#[-@ ""o&I&II%%HH'%#')'3 $[-@ q=$($:$:$<M$1))00:FJFZFZ44X]]C %2 JJ..$//!G0LSXY[_[o[o ""o&D&DD%%OO$i+}Vfg#  z*I%hZ0L#**I|+DE!!?OQZ[^hjv]w 	**>8E\]$$_6^_$$_6^_##D)r   )r!   r"   r#   r   r   rP   r^  rv  r  rB  r   r   s   @r   rW  rW  5  s1    // / 3,\(&Tk r   rW  c                   V     e Zd Zdededef fdZd
dee   def fdZ	d Z
dd	Z xZS )PhiOnnxModelrJ   r   r   c                     t         |   |       t        | j                  ||      | _        t        | |      | _        t        |       | _        t        |       | _
        t        |       | _        y rT   )rO   rP   r   rJ   phi2_preprocessorrW  fission_transformer_blockrO  fission_causal_lm_headrE  fission_transformer_layernormr3  fission_transformer_embeddingr   s       r   rP   zPhiOnnxModel.__init__P  s[    !1$**i!U)CD))T&&G&M#-KD-Q*-KD-Q*r   optionsadd_dynamic_axesc                 &   |J |j                   }| j                  j                  |       | j                  j	                  |       | j                  j                          | j                  j                          | j                  j                          | j                  j                          t        | )          t        |       | _        t        |       | _        | j                  j                          | j                  j                          y rT   )attention_op_typer  rU   r  r0  applyr  r  r  rO   r>  r   fuse_slnr
   fuse_bias_sln)r   r  r  rR   rQ   s       r   optimizezPhiOnnxModel.optimizeX  s    """00&&<<\J..|<&&,,.**002##))+**002 5T:=dC  "r   c                     i }g d}|D ]!  }| j                  |      }t        |      ||<   # t        j                  d|        |S )z8
        Returns node count of fused operators.
        )	r   r   r   r   GeluBiasGelur   r   SkipLayerNormalizationzOptimized operators: )get_nodes_by_op_typerF   r;  r<  )r   op_countopsopnodess        r   get_fused_operator_statisticsz*PhiOnnxModel.get_fused_operator_statisticsm  sT     

 B--b1Eu:HRL  	+H:67r   c                    | j                         dt        ffd} |d       |d      z    |d      z    |d      z   } |d       |d      z    |d	      z   } |d
       |d      z   }|dkD  xr ||k(  xr ||k\  }|dk(  rt        j                  d       |dk(  rt        j                  d       |dk(  rt        j	                  d       |S )zA
        Returns True when the model is fully optimized.
        op_namec                 .    j                  |       xs dS )Nr   )get)r  fused_op_counts    r   r  z1PhiOnnxModel.is_fully_optimized.<locals>.op_count  s    !%%g.3!3r   r   r   r   r   r  r  r   r   r  r   zLayer Normalization not fusedzGelu (or FastGelu) not fusedz+Attention (or MultiHeadAttention) not fused)r  rX   r;  debugwarning)r   r  r  r   gelu
layer_norm
is_perfects    `     r   is_fully_optimizedzPhiOnnxModel.is_fully_optimized  s     !!??AN	4c 	4 [!+,-,-. '() 	 (:"66*9MM23h?W6XX
!mZ)t*;Z*PYBY
?LL8919LL78>NNHIr   )NFrT   )r!   r"   r#   r   r   rP   r   r	   boolr  r  r  r   r   s   @r   r  r  O  sD    Rj RS Rs R# 7 #RV #*.r   r  ),loggingr   typingr   r   numpyr   dynamo_onnx_helperr   fusion_baser   fusion_optionsr   r	   fusion_skiplayernormr
   r   fusion_utilsr   onnxr   r   r   r   r   
onnx_modelr   r!   r;  r   r&   r.   r1   r5   r<   r?   rB   rI   r   r3  rE  rO  rW  r  r$   r   r   <module>r     s     !  /  9 _ $ I I  	8	' '
: :
: :
: :
    Vf Vrp,' p,f W  D W  B   BW  W tS9 Sr   