
    g                         d dl mZ d dlmZmZ d dlmZ d dlmZ d dl	m
Z
mZmZ d dlmZ  ee      Z G d de      Z G d	 d
e      Zy)    )	getLogger)DictList)Fusion)FusionUtils)	NodeProtoTensorProtohelper)	OnnxModelc                   R     e Zd Zdef fdZdedeeee   f   deeef   fdZ	 xZ
S )FusionTransposemodelc                 (    t         |   |dd       y )N	Transposesuper__init__selfr   	__class__s     ^/var/www/openai/venv/lib/python3.12/site-packages/onnxruntime/transformers/fusion_transpose.pyr   zFusionTranspose.__init__   s    [9    transpose_nodeinput_name_to_nodesoutput_name_to_nodec                    |}|j                   d   |vry||j                   d      }|j                  dk7  rd}nS|}| j                  j                  ||      }|rt	        |      dkD  ry|j                   d   |vry||j                   d      }|j                  dk7  ryt        j                  |d      }t        |t              sJ t        j                  |d      }	t        |	t              sJ t	        |	      t	        |      k(  sJ g }
t        |      D ]  \  }}|
j                  |	|           |>t        j                  | j                  |||      rY| j                  j                  |       n=t        j                  | j                  |||      r| j                  j                  |       |j                  d       |j                  j!                  t#        j$                  d|
      g       y)a  
        Note that onnxruntime will do comprehensive transpose optimization after loading model.
        The purpose of this fusion is to make graph clean before running onnxruntime.

        Case 1:
              (input)-->Transpose(perm=a)-->Transpose(perm=b)-->
        After:
              (input)-->Transpose(perm=a)-->  (this path can be removed if the output is not used anymore)
                |
                +----->Transpose(perm=a*b)-->

        Case 2 (Cast has only one child):
              (input)-->Transpose(perm=a)--> Cast -->Transpose(perm=b)-->
        After:
              (input)-->Transpose(perm=a)-->  (this path can be removed if the output is not used anymore)
                |
                +----->Cast --> Transpose(perm=a*b)-->
        r   NCast   r   perm	attribute)inputop_typer   get_childrenlenr   get_node_attribute
isinstancelist	enumerateappendr   skip_parentnodes_to_remove
ClearFieldr    extendr
   make_attribute)r   r   r   r   transpose_btranspose_a	cast_nodecast_childrenpermutationparent_permutationoutput_permutation_jindexs                r   fusezFusionTranspose.fuse   s   0 %Q'::)+*;*;A*>?&(I#I JJ33I?RSM]!3a!7q!)<<-iooa.@AK+-22;G+t,,,&99+vN,d333%&#k*::::";/IB%%&8&?@ 0 &&tzz;M`a$$++K8&&tzz9kK^_$$++K8{+$$f&;&;FDV&W%XYr   )__name__
__module____qualname__r   r   r   r   strr   r8   __classcell__r   s   @r   r   r      sO    :i :AZ!AZ "#tI"67AZ "#y.1	AZr   r   c                   j     e Zd Zdef fdZd
dedee   fdZde	de
eee	   f   de
ee	f   fd	Z xZS )FusionInsertTransposer   c                 (    t         |   |dd       y )N 	GroupNormr   r   s     r   r   zFusionInsertTranspose.__init__Z   s    K0r   
input_namer   c                     | j                   j                  d      }||dz   dz   |z   }t        j                  d|g|g|      }|j                  j                  t        j                  d|      g       |S )z&Append a Transpose node after an inputr   _out-)inputsoutputsnamer   )r   create_node_namer
   	make_noder    r-   r.   )r   rD   r   output_name	node_namer   s         r   create_transpose_nodez+FusionInsertTranspose.create_transpose_node]   sw    JJ//<	#f,s2Z?K))+zlU`Tahqr  '')>)>vt)L(MNr   group_norm_noder   r   c                    | j                   j                  |g dg d      }|y|\  }}}}}	| j                   j                  |j                  d         ryt	        j
                  |d      }
t        |
t              sJ |
g dk7  ryt        |j                        dk(  r| j                   j                  |j                  d         d	k(  rt        |j                        dk(  r| j                   j                  |j                  d         dk(  rxt        | j                   j                  |	|            dk(  rPt        | j                   j                  ||            dk(  r(t        | j                   j                  ||            dk(  syd
}| j                   j                  |      &| j                  |t        j                  dgdgd       d}| j                   j                  |      &| j                  |t        j                  dgdgd       d|j                  d<   d
|j                  d<   | j                   j!                  d      dz   }| j                   j#                  |j                  d   |       | j%                  |j                  d   g d|      }| j                   j'                  || j(                         | j+                  d       y)a  
        This optimization will insert an Transpose, and onnxruntime transpose optimizer will remove it together with
        another Transpose so that we can get effect of reducing one Transpose after onnxruntime optimization.
        Before:
            --> Gemm --> Unsqueeze(axes=[2]) --> Unsqueeze(axes=[3]) --> Add --> Transpose([0,2,3,1]) --> GroupNorm
        After:
            --> Gemm --> Unsqueeze(axes=[1]) --> Unsqueeze(axes=[2]) -->Transpose([0,3,1,2]) --> Add --> Transpose([0,2,3,1]) --> GroupNorm
        )r   Add	UnsqueezerS   Gemm)r   r   Nr   r   Nr   r   )r         r   rU   r   rV   ort_const_unsqueeze_axes_1F)rJ   	data_typedimsvalsrawort_const_unsqueeze_axes_2r   _NCHW)r   rV   r   rU   zInsert Transpose)r   match_parent_pathfind_graph_outputoutputr   r%   r&   r'   r$   r!   get_constant_valuer#   get_initializeradd_initializerr	   INT64rK   replace_input_of_all_nodesrO   add_nodethis_graph_nameincrease_counter)r   rP   r   r   	gemm_path	transposeaddunsqueeze_3unsqueeze_2gemmr3   axes_1axes_2transpose_output_namenew_transposes                  r   r8   zFusionInsertTranspose.fusef   s    JJ00SUg
	 9B6	3[$::''(:(:1(=>229fE+t,,,,& !!"a'

--k.?.?.BCqHK%%&!+

--k.?.?.BCqHDJJ++D2EFG1LDJJ++K9LMNRSSDJJ++K9LMNRSS .::%%f-5  %++SS !  .::%%f-5  %++SS !   <!;! $

 ; ;K H7 R

--k.@.@.CEZ[22;3E3Ea3H,Xmn

M4+?+?@01r   )N)r9   r:   r;   r   r   r<   r   intrO   r   r   r8   r=   r>   s   @r   r@   r@   Y   sb    1i 1 49 B2"B2 "#tI"67B2 "#y.1	B2r   r@   N)loggingr   typingr   r   fusion_baser   fusion_utilsr   onnxr   r	   r
   
onnx_modelr   r9   loggerr   r@    r   r   <module>r|      sE       $ / /  	8	EZf EZPO2F O2r   