
    gL                        d dl mZ d dlmZmZ d dlmZ d dlmZm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZmZ d dlmZmZ d dlmZ d dl m!Z! d dl"m#Z# d dl$m%Z% d dl&m'Z' d dl(m)Z) d dl*m+Z+ d dl,m-Z- d dl.m/Z/m0Z0 d dl1m2Z2m3Z3 d dl4m5Z5 d dl6m7Z7m8Z8m9Z9 d dl:m;Z;  ee<      Z= G d de;      Z>y)    )	getLogger)ListOptional)PackingMode)AttentionMaskFusionAttention)FusionBartAttention)FusionBiasGelu)FusionEmbedLayerNormalization)FusionFastGelu)
FusionGelu)FusionGeluApproximation)FusionGemmFastGelu)FusionLayerNormalizationFusionLayerNormalizationTF)AttentionMaskFormatFusionOptions)FusionQOrderedAttention)FusionQOrderedGelu) FusionQOrderedLayerNormalization)FusionQOrderedMatMul)FusionQuickGelu)FusionReshape)FusionRotaryEmbeddings)FusionShape)"FusionSimplifiedLayerNormalization&FusionSkipSimplifiedLayerNormalization) FusionBiasSkipLayerNormalizationFusionSkipLayerNormalization)FusionUtils)
ModelProtoTensorProtohelper)	OnnxModelc                   
    e Zd Zd&dededef fdZd Zd Zd Zd Z	d	 Z
d
 Zd Zd Zd Zd Zd Zd'dZd Zd Zd Zdedee   defdZdefdZd Zd(dZd Zd Zd Zd Zd)de e!   d efd!Z"d" Z#d*d#Z$d+d$efd%Z% xZ&S ),BertOnnxModelmodel	num_headshidden_sizec                 v   |dk(  r|dk(  s|dkD  r||z  dk(  sJ t         |   |       || _        || _        t	        |       | _        t        | | j                  | j                  | j
                        | _        t        | | j                  | j                  | j
                        | _	        t        |       | _        y)aG  Initialize BERT ONNX Model.

        Args:
            model (ModelProto): the ONNX model
            num_heads (int, optional): number of attention heads. Defaults to 0 (detect the parameter automatically).
            hidden_size (int, optional): hidden dimension. Defaults to 0 (detect the parameter automatically).
        r   N)super__init__r(   r)   r   attention_maskr   attention_fusionr   qordered_attention_fusionr    utils)selfr'   r(   r)   	__class__s       ]/var/www/openai/venv/lib/python3.12/site-packages/onnxruntime/transformers/onnx_model_bert.pyr,   zBertOnnxModel.__init__&   s     Q;!#3Q;YbKbfgKghh"&+D1 /d6F6FX\XkXk l)@$""DNND4G4G*
& !&
    c                 l    | j                   j                          | j                  j                          y N)r.   applyr/   r1   s    r3   fuse_attentionzBertOnnxModel.fuse_attention;   s&    ##%&&,,.r4   c                     t        |       }|j                          t        |       }|j                          t        |       }|j                          t	        |       }|j                          y r6   )r   r7   r   r   r   r1   fusions     r3   	fuse_geluzBertOnnxModel.fuse_gelu@   sN    D!% &#D)r4   c                 <    t        | |      }|j                          y r6   )r
   r7   )r1   is_fastgelur<   s      r3   fuse_bias_geluzBertOnnxModel.fuse_bias_geluK   s    k2r4   c                 :    t        |       }|j                          y r6   )r   r7   r;   s     r3   gelu_approximationz BertOnnxModel.gelu_approximationO   s    (.r4   c                 :    t        |       }|j                          y r6   )r   r7   r;   s     r3   fuse_gemm_fast_geluz!BertOnnxModel.fuse_gemm_fast_geluS   s    #D)r4   c                 :    t        |       }|j                          y r6   )r   r7   r;   s     r3   fuse_add_bias_skip_layer_normz+BertOnnxModel.fuse_add_bias_skip_layer_normW   s    1$7r4   c                 :    t        |       }|j                          y r6   )r   r7   r;   s     r3   fuse_reshapezBertOnnxModel.fuse_reshape[   s    t$r4   c                 :    t        |       }|j                          y r6   )r   r7   r;   s     r3   
fuse_shapezBertOnnxModel.fuse_shape_   s    T"r4   c                 <    t        | |      }|j                          y r6   )r   r7   )r1   use_mask_indexr<   s      r3   fuse_embed_layerzBertOnnxModel.fuse_embed_layerc   s    .t^Dr4   c                     t        |       }|j                          t        |       }|j                          t        |       }|j                          y r6   )r   r7   r   r   r;   s     r3   fuse_layer_normzBertOnnxModel.fuse_layer_normg   s=    )$/+D1 2$7r4   c                 :    t        |       }|j                          y r6   )r   r7   r;   s     r3   fuse_simplified_layer_normz(BertOnnxModel.fuse_simplified_layer_normr   s    3D9r4   c                 >    t        | |      }|j                          y )N)shape_infer)r   r7   )r1   rS   r<   s      r3   fuse_skip_layer_normz"BertOnnxModel.fuse_skip_layer_normv   s    -dLr4   c                 :    t        |       }|j                          y r6   )r   r7   r;   s     r3   fuse_skip_simplified_layer_normz-BertOnnxModel.fuse_skip_simplified_layer_normz   s    7=r4   c                    t        |       }|j                          t        t        d | j                  j
                  j                              }t        t        d |            }d}|t        | j                  j                        k  r| j                  j                  |   }d|j                  v r4|j                  |vr&| j                  j                  j                  |       n|dz  }|t        | j                  j                        k  ry y )Nc                 B    | j                   dk(  xr | j                  dk7  S )NRotaryEmbeddingcom.microsoft)op_typedomainnodes    r3   <lambda>z6BertOnnxModel.fuse_rotary_embeddings.<locals>.<lambda>   s     T\\->>a4;;RaCaar4   c                     | j                   S r6   )r\   r]   s    r3   r_   z6BertOnnxModel.fuse_rotary_embeddings.<locals>.<lambda>   s    dkkr4   r   rY      )r   r7   listfilterr'   graphr^   setmaplen	functionsnamer\   remove)r1   r<   rot_emb_nodesnon_ms_domains_to_keepifns         r3   fuse_rotary_embeddingsz$BertOnnxModel.fuse_rotary_embeddings~   s    '-a

  %%
 "%S)A=%Q!R#djj**++%%a(B BGG+		AW0W

$$++B/Q #djj**++r4   c                 :    t        |       }|j                          y r6   )r   r7   r;   s     r3   fuse_qordered_mamtulz"BertOnnxModel.fuse_qordered_mamtul   s    %d+r4   r[   input_indicescastedc                    g }| j                         }| j                  |      }|D ]  }|D cg c]*  }|t        |j                        k  s|j                  |   , }	}|	D ]  }
| j	                  |
      r|r|j                  |
       )|
|v s.||
   }|j                  dk(  sC| j	                  |j                  d         b|se|j                  |j                  d            |S c c}w )z
        Get graph inputs that feed into node type (like EmbedLayerNormalization or Attention).
        Returns a list of the graph input names based on the filter whether it is casted or not.
        Castr   )output_name_to_nodeget_nodes_by_op_typerg   inputfind_graph_inputappendr[   )r1   r[   rr   rs   graph_inputsrv   nodesr^   rm   bert_inputs
bert_inputparents               r3   get_graph_inputs_from_node_typez-BertOnnxModel.get_graph_inputs_from_node_type   s    
 "668))'2D2?W-Q1s4::CV4::a=-KW)
((4!$++J7#660<F~~/D4I4I&,,WX/4Z4f!(//Q@ *   Xs   C)C)c                 ^    | j                  dg d|      }|| j                  ddg|      z  }|S )NEmbedLayerNormalization)r   ra      	Attention   )r   )r1   rs   inputss      r3   !get_graph_inputs_from_fused_nodesz/BertOnnxModel.get_graph_inputs_from_fused_nodes   s9    556OQZ\bc$66{QCPPr4   c                     | j                         }d}d}|j                  D ]:  }| j                  |t        j                        \  }}|r|dz  }|t        |      z  }< t        j                  d| d| d       y)zPChange data type of all graph inputs to int32 type, and add Cast node if needed.r   ra   z)Graph inputs are changed to int32. Added z Cast nodes, and removed z Cast nodes.N)rd   rx   change_graph_input_typer"   INT32rg   loggerinfo)r1   rd   add_cast_countremove_cast_countgraph_inputnew_noderemoved_nodess          r3   change_graph_inputs_to_int32z*BertOnnxModel.change_graph_inputs_to_int32   s    

 ;;K&*&B&B;P[PaPa&b#Hm!#]!33	 '
 	77GG`ar`ss  A	
r4   c                 >   | j                  d      | j                  d      z   }| j                  j                  j                  D ]|  }|j                  |v s|j
                  j                  j                  j                  d   }||_	        |I|j
                  j                  j                  j                  d   }||_	        ~ | j                  j                  j                  D ]6  }|j
                  j                  j                  j                  d   }||_	        8 y)zD
        Update input and output shape to use dynamic axes.
        T)rs   Fr   Nra   )r   r'   rd   rx   ri   typetensor_typeshapedim	dim_paramoutput)r1   dynamic_batch_dimdynamic_seq_lenbert_graph_inputsrx   	dim_protor   s          r3   use_dynamic_axeszBertOnnxModel.use_dynamic_axes   s     !BB C 
22%2@A ZZ%%++Ezz..!JJ2288<<Q?	&7	#". %

 6 6 < < @ @ CI*9I' , jj&&--F//5599!<I"3I .r4   c                 $    | j                          y r6   )adjust_reshape_and_expandr8   s    r3   
preprocesszBertOnnxModel.preprocess   s    &&(r4   c                 "   g }| j                         D ]D  }|j                  dk(  s| j                  |j                  d         }|N|j                  dk(  r?|j                  |g       | j                  |j                  d   |j                  d          | j                  |g dg d| j                               }||d   }| j                  |j                  d         }|d   }| j                  |j                  d         }|d   }	||t        |      d	k(  st        |      dk(  s|d   |d   k(  s)|	j                  d   |j                  d<   G |r3| j                  |       t        j                  d
t        |              y y )NReshapera   r   )Expandr   r   Slice)r   r   r   r      z"Removed Reshape and Expand count: )r|   r[   get_constant_valuerx   sizeextendreplace_input_of_all_nodesr   match_parent_pathrv   rg   remove_nodesr   r   )
r1   nodes_to_remover^   reshape_shapereshape_pathexpand_nodeexpand_shape_valuereshape_before_expandshape_value
slice_nodes
             r3   r   z'BertOnnxModel.adjust_reshape_and_expand   s   JJLD||y( !% 7 7

1 F ,1C1Cq1H#**D6233DKKNDJJqMR  $55< ,,.	   +".r"2K)-)@)@ARARSTAU)V&,8,<)"&"9"9:O:U:UVW:X"YK!-b!1J*6'3 23q8,1.q1[^C(2(9(9!(<

1C !F o.KK<S=Q<RST r4   c                 @   | j                         }g }| j                         D ]  }dddd}|j                  |v r||j                     }| j                  |g d|dddddg|      }|l|\  }}}	}
}}|j                  d   | j                         j                  d   j                  k(  r,|j                  d   |j                  d<   | j                         }|j                  dk(  s| j                  |g dg d|      }||d	   j                  d   | j                         j                  d   j                  k(  st        j                  d|j                  dt        |j                        dz
   |j                  |j                  d
z         }d|_        |j                  j                  t        j                  d| j                        g       | j!                  || j#                  |      j                         |j%                  |        | j'                  |       y )Nra   r   r   )r   	ReduceSumr   )ru   ConstantOfShapeConcat	UnsqueezeGatherShaper   )r   ru   r   r   )r   r   r   r   r   _remove_mask)r   outputsri   rZ   r(   )rv   r|   r[   r   rx   rd   ri   r   r#   	make_noderg   r\   	attributer   make_attributer(   add_nodeget_graph_by_noderz   r   )r1   rv   r   r^   op_input_idrm   parent_nodescastconstantOfShapeconcat	unsqueezegatherr   attention_nodes                 r3   clean_graphzBertOnnxModel.clean_graph   s   "668JJLD 78aVWXK||{*-#55 1aA&'   + %'!{{1~););A)>)C)CC38<<?--a0.2.F.F.H+||{*
  $55E '	   +#B'--a0DJJL4F4Fq4I4N4NN)/)9)9'#'::a#djj/A2E#F$(KK!%^!;	* 1@-&00779N9N{\`\j\j9k8lmnd6L6L^6\6a6ab'..t4y !z 	/*r4   c                 D    | j                          | j                          y r6   )r   prune_graphr8   s    r3   postprocesszBertOnnxModel.postprocessB  s    r4   optionsadd_dynamic_axesc                 L   ||j                   s| j                          | j                  j                          | j                  j	                          ||j
                  r | j                          | j                          ||j                  r| j                          | j                          | j                          ||j                  r+| j                  |j                          | j                          ||j                  r| j!                          || j"                  j%                  |j&                         |j(                  rVt+        | j,                  t.              s<t1        | | j2                  | j4                  | j"                  |j(                        | _        ||j6                  r| j9                          ||j:                  r| j=                          | j?                          ||j@                  r.|j&                  tB        jD                  k(  }| jG                  |       | j                  jI                          | jK                          ||jL                  r$| jO                  d       | jO                  d       ||jP                  r| jS                          ||jT                  r| jW                          ||jX                  r| j[                          | j]                          |r| j_                          t`        jc                  d| je                                 y )NT)r?   Fzopset version: )3enable_shape_inferencedisable_shape_inferencer0   remove_identity_nodesremove_useless_cast_nodesenable_layer_normrO   rQ   enable_gelur=   r   rH   enable_skip_layer_normrT   rV   enable_rotary_embeddingsro   r-   set_mask_formatattention_mask_formatuse_multi_head_attention
isinstancer.   r	   r   r)   r(   enable_attentionr9   enable_qordered_matmulrq   rJ   enable_embed_layer_normr   MaskIndexEndrM   remove_useless_reshape_nodesr   enable_bias_gelur@   enable_bias_skip_layer_normrF   enable_gelu_approximationrB   enable_gemm_fast_gelurD   remove_unused_constantr   r   r   get_opset_version)r1   r   r   rL   s       r3   optimizezBertOnnxModel.optimizeF  s   )G)G((*

((* 	

,,.O 9 9  "++-O 3 3NNO > >%%g&D&DE002O @ @'')//0M0MN//
4CXCXZm8n(7$$NN''44)% O 8 8! O > >%%'O ? ?$::>Q>^>^^N!!.1 	

//1 O 8 8D1E2O C C..07#D#D##%7#@#@$$&##% !!#od&<&<&>%?@Ar4   c                     i }g d}g d}||z   D ]!  }| j                  |      }t        |      ||<   # t        j                  d|        |S )z8
        Returns node count of fused operators.
        )r   r   MultiHeadAttentionGeluFastGeluBiasGeluGemmFastGeluLayerNormalizationSimplifiedLayerNormalizationSkipLayerNormalization SkipSimplifiedLayerNormalizationrY   )QOrderedAttentionQOrderedGeluQOrderedLayerNormalizationQOrderedMatMulzOptimized operators: )rw   rg   r   r   )r1   op_countopsq_opsopr|   s         r3   get_fused_operator_statisticsz+BertOnnxModel.get_fused_operator_statistics  s_     

 +B--b1Eu:HRL  	+H:67r4   c                 R   | j                         dt        ffd} |d      } |d       |d      z    |d      z   } |d       |d      z    |d	      z   } |d
       |d      z   } |d       |d      z   }|dkD  xr  |dkD  xr ||k(  xr |d|z  k\  xs |d|z  k\  }|dk(  rt        j                  d       |dk(  rt        j                  d       |dk(  rt        j                  d       |dk(  rt        j                  d       |dk(  rt        j	                  d       |S )zA
        Returns True when the model is fully optimized.
        op_namec                 .    j                  |       xs dS )Nr   )get)r  fused_op_counts    r3   r   z2BertOnnxModel.is_fully_optimized.<locals>.op_count  s    !%%g.3!3r4   r   r   r   r   r   r   r   r   r   r   r   r   r   zLayer Normalization not fusedz$Simple Layer Normalization not fusedzGelu (or FastGelu) not fusedz!EmbedLayerNormalization not fusedz+Attention (or MultiHeadAttention) not fused)r   strr   debugwarning)	r1   r  r   embed	attentiongelu
layer_normsimple_layer_norm
is_perfects	    `       r3   is_fully_optimizedz BertOnnxModel.is_fully_optimized  sV    !!??AN	4c 	4 23[)H5I,JJXViMjj	(:"66*9MM23h?W6XX
$%CDxPrGss QY XQXd"X I-V3DI3U	 	 ?LL89!LL?@19LL78A:LL<=>NNHIr4   use_symbolic_shape_inferc                 <    t        |       }|j                  |       y r6   )r   convert)r1   r  packing_modes      r3   convert_to_packing_modez%BertOnnxModel.convert_to_packing_mode  s    "4(56r4   )r   r   )T)
batch_sizemax_seq_len)NFr6   )F)'__name__
__module____qualname__r!   intr,   r9   r=   r@   rB   rD   rF   rH   rJ   rM   rO   rQ   rT   rV   ro   rq   r  r   boolr   r   r   r   r   r   r   r   r   r   r   r   r  r  __classcell__)r2   s   @r3   r&   r&   %   s    'j 'S '3 '*/
		(s 4PS9 ^b , 

4('UR@+DOB 7 OBRV OBb@&P7 7r4   r&   N)?loggingr   typingr   r   r  r   fusion_attentionr   r   fusion_bart_attentionr	   fusion_biasgelur
   fusion_embedlayerr   fusion_fastgelur   fusion_gelur   fusion_gelu_approximationr   fusion_gemmfastgelur   fusion_layernormr   r   fusion_optionsr   r   fusion_qordered_attentionr   fusion_qordered_gelur   fusion_qordered_layernormr   fusion_qordered_matmulr   fusion_quickgelur   fusion_reshaper   fusion_rotary_attentionr   fusion_shaper   fusion_simplified_layernormr   r   fusion_skiplayernormr   r   fusion_utilsr    onnxr!   r"   r#   
onnx_modelr$   r  r   r&    r4   r3   <module>r6     sp     ! / ; 5 * ; * " = 2 Q = = 3 F 7 , ( : $ r _ $ 0 0  	8	|7I |7r4   