
    gL0                     Z    d dl mZ d dlmZ  G d d      Z G d de      Z G d d      Zy	)
    )ArgumentParser)Enumc                       e Zd ZdZdZdZdZy)AttentionMaskFormatr            N)__name__
__module____qualname__MaskIndexEndMaskIndexEndAndStartAttentionMaskNoMask     \/var/www/openai/venv/lib/python3.12/site-packages/onnxruntime/transformers/fusion_options.pyr   r   	   s    L  M Fr   r   c                   .    e Zd ZdZdZdZdZd Zd Zd Z	y)	AttentionOpType	AttentionMultiHeadAttentionGroupQueryAttentionPagedAttentionc                     | j                   S Nvalueselfs    r   __str__zAttentionOpType.__str__   s    zzr   c                 ,    t        | j                        S r   )hashr   r   s    r   __hash__zAttentionOpType.__hash__!   s    DJJr   c                 4    |j                   | j                   k(  S r   r   )r   others     r   __eq__zAttentionOpType.__eq__$   s    {{djj((r   N)
r
   r   r   r   r   r   r   r    r#   r&   r   r   r   r   r      s'    I-/%N )r   r   c                   V    e Zd ZdZd ZddZd ZdefdZe	d        Z
e	defd	       Zy
)FusionOptionsz'Options of fusion in graph optimizationc                    d| _         d| _        d| _        d| _        d| _        d| _        d| _        d| _        d| _        d| _	        d| _
        d| _        d| _        d| _        d| _        |dk(  rd| _        t        j                   | _        |dk(  rt        j$                  | _        n|dk(  rt        j&                  | _        d | _        |dv r2d| _        d| _        d| _        d| _        d| _        d| _        d| _        y y )NTFclipbertvitunetvaer*   )enable_geluenable_layer_normenable_attentionenable_rotary_embeddingsuse_multi_head_attention!disable_multi_head_attention_biasenable_skip_layer_normenable_embed_layer_normenable_bias_skip_layer_normenable_bias_geluenable_gelu_approximationenable_qordered_matmulenable_shape_inferenceenable_gemm_fast_gelugroup_norm_channels_lastr   r   attention_mask_formatr   r   attention_op_typeenable_nhwc_convenable_group_normenable_skip_group_normenable_bias_splitgeluenable_packed_qkvenable_packed_kvenable_bias_add)r   
model_types     r   __init__zFusionOptions.__init__+   s   !% $(,% ).%16.&*#'+$+/( $).&&*#&*#%*"(,%+0D( &9%F%F")<)I)ID&5 )<)C)CD&!% 00$(D!%)D"*.D')-D&%)D"$(D!#'D  1r   c                 ^    |rt         j                  | _        y t         j                  | _        y r   )r   r   r?   r   )r   use_raw_masks     r   use_raw_attention_maskz$FusionOptions.use_raw_attention_maskZ   s!    )<)J)JD&)<)I)ID&r   c                 .    t         j                  | _        y r   )r   r   r?   r   s    r   disable_attention_maskz$FusionOptions.disable_attention_mask`   s    %8%?%?"r   attn_op_typec                     || _         y r   )r@   )r   rO   s     r   set_attention_op_typez#FusionOptions.set_attention_op_typec   s
    !-r   c                    t        | j                        }| j                  rd|_        | j                  rd|_        | j                  rd|_        | j                  rd|_	        | j                  rd|_
        | j                  rd|_        | j                  rd|_        | j                  rd|_        | j"                  rd|_        | j&                  rd|_        | j(                  rd|_        | j,                  rd|_        | j.                  r|j1                  d       | j0                  r|j1                  d       | j2                  r|j5                          | j                  dv r| j6                  rd|_        | j:                  rd|_        | j>                  rd|_         | jB                  rd|_"        | jF                  rd|_$        | jJ                  rd|_&        | jN                  rd|_(        | jR                  rd|_*        |S )NFTr-   )+r(   rH   disable_gelur0   disable_layer_normr1   disable_rotary_embeddingsr3   disable_attentionr2   r4   disable_skip_layer_normr6   disable_embed_layer_normr7   disable_bias_skip_layer_normr8   disable_bias_gelur9   r:   disable_shape_inferencer<   r=   use_mask_indexrL   no_attention_maskrN   use_group_norm_channels_firstr>   disable_nhwc_convrA   disable_group_normrB   disable_skip_group_normrC   disable_bias_splitgelurD   disable_packed_qkvrE   disable_packed_kvrF   disable_bias_addrG   )argsoptionss     r   parsezFusionOptions.parsef   s   0"'G""(-G%))/4G,!!',G$((/3G,''-2G*((.3G+,,27G/!!',G$))04G-''-2G*%%,0G)**51&&**40!!**,??5511380%%+0(&&,1)++16.**05-&&,1)%%+0($$*/'r   parserc                    | j                  dddd       | j                  d       | j                  dddd       | j                  d	       | j                  d
ddd       | j                  d       | j                  dddd       | j                  d       | j                  dddd       | j                  d       | j                  dddd       | j                  d       | j                  dddd       | j                  d       | j                  dddd       | j                  d       | j                  dddd       | j                  d       | j                  dddd        | j                  d!       | j                  d"ddd#       | j                  d$       | j                  d%ddd&       | j                  d'       | j                  d(ddd)       | j                  d*       | j                  d+ddd,       | j                  d-       | j                  d.ddd/       | j                  d0       | j                  d1ddd2       | j                  d3       | j                  d4ddd5       | j                  d6       | j                  d7ddd8       | j                  d9       | j                  d:ddd;       | j                  d<       | j                  d=ddd>       | j                  d?       | j                  d@dddA       | j                  dB       | j                  dCdddD       | j                  dE       | j                  dFdddG       y )HNz--disable_attentionF
store_truezdisable Attention fusion)requiredactionhelp)rV   z--disable_skip_layer_normz%disable SkipLayerNormalization fusion)rW   z--disable_embed_layer_normz&disable EmbedLayerNormalization fusion)rX   z--disable_bias_skip_layer_normz2disable Add Bias and SkipLayerNormalization fusion)rY   z--disable_bias_geluz)disable Add Bias and Gelu/FastGelu fusion)rZ   z--disable_layer_normz!disable LayerNormalization fusion)rT   z--disable_geluzdisable Gelu fusion)rS   z--enable_gelu_approximationz+enable Gelu/BiasGelu to FastGelu conversion)r:   z--disable_shape_inferencez disable symbolic shape inference)r[   z--enable_gemm_fast_geluzenable GemmfastGelu fusion)r=   z--use_mask_indexzWuse mask index to activate fused attention to speed up. It requires right-side padding!)r\   z--use_raw_attention_maskzuse raw attention mask. Use this option if your input is not right-side padding. This might deactivate fused attention and get worse performance.)rL   z--no_attention_maskz1no attention mask. Only works for model_type=bert)r]   z--use_multi_head_attentionzUse MultiHeadAttention instead of Attention operator for testing purpose. Note that MultiHeadAttention might be slower than Attention when qkv are not packed. )r4   z--disable_group_normz9not fuse GroupNorm. Only works for model_type=unet or vae)r`   z--disable_skip_group_normzPnot fuse Add + GroupNorm to SkipGroupNorm. Only works for model_type=unet or vae)ra   z--disable_packed_kvz[not use packed kv for cross attention in MultiHeadAttention. Only works for model_type=unet)rd   z--disable_packed_qkvz[not use packed qkv for self attention in MultiHeadAttention. Only works for model_type=unet)rc   z--disable_bias_addz0not fuse BiasAdd. Only works for model_type=unet)re   z--disable_bias_splitgeluz6not fuse BiasSplitGelu. Only works for model_type=unet)rb   z--disable_nhwc_convz:Do not use NhwcConv. Only works for model_type=unet or vae)r_   z--use_group_norm_channels_firstznUse channels_first (NCHW) instead of channels_last (NHWC) for GroupNorm. Only works for model_type=unet or vae)r^   z--disable_rotary_embeddingsz5Do not fuse rotary embeddings into RotaryEmbedding op)add_argumentset_defaults)ri   s    r   add_argumentszFusionOptions.add_arguments   sv   !+	 	 	
 	e4'8	 	 	
 	E:(9	 	 	
 	U;,E	 	 	
 	?!<	 	 	
 	e4"4	 	 	
 	u5&	 	 	
 	/)>	 	 	
 	e<'3	 	 	
 	E:%-	 	 	
 	%8j	 	 	
 	51& e	 	 	
 	59!D	 	 	
 	e4(d	 	 	
 	U;"L	 	 	
 	u5'c	 	 	
 	E:!n	 	 	
 	e4"n	 	 	
 	u5 C	 	 	
 	U3&I	 	 	
 	59!M	 	 	
 	e4- B	 	 	
 	%@)H	 	 	
r   N)T)r
   r   r   __doc__rI   rL   rN   r   rQ   staticmethodrh   r   rq   r   r   r   r(   r(   (   sV    1-(^J@./ . 3 3j w
n w
 w
r   r(   N)argparser   enumr   r   r   r(   r   r   r   <module>rv      s/   
 $  )d )"l
 l
r   