
    g*$                         d dl Z d dlmZ d dlmZ d dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ d d	lmZ d d
lmZmZ d dlmZ d dlmZ d dlmZ d dlmZ  e j8                  e      Z G d de      Zy)    N)Optional)FusionAttentionUnet)FusionBiasAdd)FusionBiasSplitGelu)FusionGroupNorm)FusionNhwcConv)FusionOptions)FusionSkipGroupNorm)FusionInsertTransposeFusionTranspose)is_installed)
ModelProto)	OnnxModel)BertOnnxModelc                        e Zd Zddededef fdZd Zd Zd Zd Z	d	 Z
dd
ee   fdZd Zdd
ee   fdZdd
ee   fdZd Z xZS )UnetOnnxModelmodel	num_headshidden_sizec                 \    |dk(  r|dk(  s|dkD  r||z  dk(  sJ t         |   |||       y)aG  Initialize UNet ONNX Model.

        Args:
            model (ModelProto): the ONNX model
            num_heads (int, optional): number of attention heads. Defaults to 0 (detect the parameter automatically).
            hidden_size (int, optional): hidden dimension. Defaults to 0 (detect the parameter automatically).
        r   )r   r   N)super__init__)selfr   r   r   	__class__s       ]/var/www/openai/venv/lib/python3.12/site-packages/onnxruntime/transformers/onnx_model_unet.pyr   zUnetOnnxModel.__init__   sA     Q;!#3Q;YbKbfgKghh)M    c                 $    | j                          y N)remove_useless_divr   s    r   
preprocesszUnetOnnxModel.preprocess&   s    !r   c                 D    | j                          | j                          y r   )prune_graphremove_unused_constantr    s    r   postprocesszUnetOnnxModel.postprocess)   s    ##%r   c                    | j                         D cg c]  }|j                  dk(  s| }}g }|D ])  }| j                  |d      dk(  s|j                  |       + |D ].  }| j	                  |j
                  d   |j                  d          0 |r1| j                  |       t        j                  dt        |             yyc c}w )zRemove Div by 1Divg      ?   r   zRemoved %d Div nodesN)nodesop_typefind_constant_inputappendreplace_input_of_all_nodesoutputinputremove_nodesloggerinfolen)r   node	div_nodesnodes_to_removedivs        r   r   z UnetOnnxModel.remove_useless_div-   s    &*jjlLlddlle6KTl	LC''S1Q6&&s+  $D++DKKNDJJqMJ $ o.KK.O0DE  Ms
   CCc                 >    t        | d      }|j                          y )NT)update_weight)r   apply)r   conv_to_nhwc_convs     r   convert_conv_to_nhwcz"UnetOnnxModel.convert_conv_to_nhwc=   s    *4tD!r   c           	         t        |       }|j                          d}| j                  d      }|D ]  }t        j                  |d      }t        |t              sJ |t        t        t        |                  k7  rL| j                  |j                  d         s<| j                  |j                  d         s| j                  |j                  d         rJ | j                  |j                  d   |j                  d          | j                  |       |dz  } t        |j                        |z   }|rt         j#                  d|       y y )Nr   	Transposepermr(   zRemoved %d Transpose nodes)r   r:   get_nodes_by_op_typer   get_node_attribute
isinstancelistranger3   find_graph_outputr.   find_graph_inputr/   r-   remove_noder6   r1   r2   )r   fusion_transposeremove_countr)   r4   permutationtotals          r   merge_adjacent_transposez&UnetOnnxModel.merge_adjacent_transposeB   s)   *40 ))+6D#66tVDKk4000d5[)9#:;;&&t{{1~6((A7))$**Q-8  ++DKKNDJJqMJT"AL! $ $445DKK4e< r   optionsc                     |d u xs |j                   }t        | | j                  | j                  d|d      }|j	                          |d u xs |j
                  }t        | | j                  | j                  dd|      }|j	                          y )NF)is_cross_attentionenable_packed_qkvenable_packed_kvT)rP   r   r   r   r:   rQ   )r   rM   rP   self_attention_fusionrQ   cross_attention_fusions         r   fuse_multi_head_attentionz'UnetOnnxModel.fuse_multi_head_attention^   s    $_J1J1J 3NN$/"!
 	##% $tOH0H0H!4NN##-"
 	$$&r   c                 :    t        |       }|j                          y r   )r   r:   )r   fusions     r   fuse_bias_addzUnetOnnxModel.fuse_bias_addw   s    t$r   c                    t        d      rLdd l}ddlm}  |       5  d}|j                  t	        |      dd      }| j                  ||       d d d        y t        j                  d       | j                  |d        y # 1 sw Y   y xY w)Ntqdmr   )logging_redirect_tqdm   rV   )initialdescz<tqdm is not installed. Run optimization without progress bar)r   rY   tqdm.contrib.loggingrZ   rD   	_optimizer1   r2   )r   rM   rY   rZ   stepsprogress_bars         r   optimizezUnetOnnxModel.optimize{   so    B&(#yyuqxyPw5 )(
 KKVWNN7D) )(s   2B  B	c                    ||j                   s| j                          | j                  j                          |r|j	                  d       | j                  j                          |r|j	                  d       ||j                  r| j                          |r|j	                  d       ||j                  r| j                          |r|j	                  d       | j                          |r|j	                  d       | j                          |r|j	                  d       ||j                  rI|d u xs |j                  }t        | |      }|j                          t!        |       }|j                          |r|j	                  d       ||j"                  rt%        |       }|j                          |r|j	                  d       ||j&                  r| j)                  |       |r|j	                  d       ||j*                  r| j-                          |r|j	                  d       | j/                          |r|j	                  d       | j                  j1                          |r|j	                  d       ||j2                  rt5        |       }|j                          |r|j	                  d       ||j6                  r| j9                          |r|j	                  d       ||j:                  r| j=                          |r|j	                  d       ||j>                  r | jA                          | jC                          |r|j	                  d       ||jD                  r| jG                          |r|j	                  d       | jI                          |r|j	                  d       tJ        jM                  d| jO                                 y )Nr(   zopset version: )(enable_shape_inferencedisable_shape_inferenceutilsremove_identity_nodesupdateremove_useless_cast_nodesenable_layer_normfuse_layer_normenable_gelu	fuse_gelur!   fuse_reshapeenable_group_normgroup_norm_channels_lastr   r:   r   enable_bias_splitgelur   enable_attentionrT   enable_skip_layer_normfuse_skip_layer_norm
fuse_shaperemove_useless_reshape_nodesenable_skip_group_normr
   enable_bias_skip_layer_normfuse_add_bias_skip_layer_normenable_gelu_approximationgelu_approximationenable_nhwc_convr<   rL   enable_bias_addrW   r%   r1   r2   get_opset_version)r   rM   ra   channels_lastgroup_norm_fusioninsert_transpose_fusionbias_split_gelu_fusionskip_group_norm_fusions           r   r_   zUnetOnnxModel._optimize   s:   )G)G((*

((*" 	

,,."O 9 9  ""O 3 3NN"""O 9 9$_Q1Q1QM /m D##%&;D&A##))+"O = =%8%>""((*"O 8 8**73"O > >%%'"" 	

//1"O > >%8%>""((*"O C C..0"7#D#D##%"?g66%%'))+"7#:#: ""od&<&<&>%?@Ar   c                     i }g d}|D ]!  }| j                  |      }t        |      ||<   # t        j                  d|        |S )z8
        Returns node count of fused operators.
        )		AttentionMultiHeadAttentionLayerNormalizationSkipLayerNormalizationBiasSplitGelu	GroupNormSkipGroupNormNhwcConvBiasAddzOptimized operators:)r@   r3   r1   r2   )r   op_countopsopr)   s        r   get_fused_operator_statisticsz+UnetOnnxModel.get_fused_operator_statistics   sT     

 B--b1Eu:HRL  	*8*56r   )r   r   r   )NN)__name__
__module____qualname__r   intr   r!   r%   r   r<   rL   r   r	   rT   rW   rb   r_   r   __classcell__)r   s   @r   r   r      s}    
Nj 
NS 
N3 
N"&F "
=8'-1H '2* 7 *cB-!8 cBJr   r   ) loggingtypingr   fusion_attention_unetr   fusion_bias_addr   fusion_biassplitgelur   fusion_group_normr   fusion_nhwc_convr   fusion_optionsr	   fusion_skip_group_normr
   rH   r   r   import_utilsr   onnxr   
onnx_modelr   onnx_model_bertr   	getLoggerr   r1   r    r   r   <module>r      sQ      5 ) 4 - + ( 6 C %    )			8	$jM jr   