
    g2                     j    d dl mZ d dlmZmZ d dlmZ d dlmZ d dl	m
Z
  ee      Z G d de      Zy)	    )	getLogger)DictOptional)Fusion)helper)	OnnxModelc                   z     e Zd Zdef fdZdedefdZdee   fdZ	dededee   fdZ
dededee   fd	Z xZS )
FusionFastGelumodelc                 (    t         |   |dd       y )NFastGeluTanh)super__init__)selfr   	__class__s     ]/var/www/openai/venv/lib/python3.12/site-packages/onnxruntime/transformers/fusion_fastgelu.pyr   zFusionFastGelu.__init__   s    
F3    input_name_to_nodesoutput_name_to_nodec                 |    | j                  |||      ry | j                  |||      ry | j                  |||      ry y )N)fuse_1fuse_2fuse_3)r   	tanh_noder   r   s       r   fusezFusionFastGelu.fuse   sG    ;;y"57JK;;y"57JK;;y"57JK Lr   returnc                 *   |j                   d   |vry||j                   d      }t        |      dk7  s|d   j                  dk7  ry|d   }| j                  j	                  |d      sy|j                   d   |vry||j                   d      }t        |      dk7  s|d   j                  dk7  ry|d   }| j                  j                  |dd|      }|y| j                  j                  |d      }|dk  ry|j                  |dk(  rdnd   }	| j                  j                  ||dk(  rdnd|      }
| j                  j                  |dd|      }|y| j                  j                  |dd	
      }|dk  ry| j                  j                  |d|dk(  rdnd|      }|y| j                  j                  |dd||
r|
gng       }|y| j                  j                  |dd	
      }|dk  ry| j                  j                  |d|dk(  rdnd|      }|y| j                  j	                  |d      sy|j                  d   |	k7  ry||||||||g}| j                  j                  ||j                   d   g||      sy| j                  j                  |       t        j                  d|	g|j                   | j                  j                  d            }d|_        | j                   j#                  |       | j$                  | j&                  |j(                  <   y)aj  
        Fuse Gelu with tanh into one node:
              +---------------------------+
              |                           |
              |                           v
            [root] --> Pow --> Mul -----> Add  --> Mul --> Tanh --> Add --> Mul
              |       (Y=3)   (B=0.0447...)       (B=0.7978...)    (B=1)     ^
              |                                                              |
              +------> Mul(B=0.5)--------------------------------------------+
        Note that constant input for Add and Mul could be first or second input: like either A=0.5 or B=0.5 is fine.
        r   N   Add      ?Mul      ?+ݓ?-C6?deltaexclude,C?Pow      @r   inputsoutputsnamecom.microsoftT)outputlenop_typer   has_constant_inputmatch_parentfind_constant_inputinput
get_parentis_safe_to_fuse_nodesnodes_to_removeextendr   	make_nodecreate_node_namedomainnodes_to_addappendthis_graph_namenode_name_to_graph_namer0   )r   r   r   r   childrenadd_after_tanhmul_after_tanhmul_halfi
root_input	root_nodemul_before_tanhadd_before_tanhmul_after_powpowsubgraph_nodes
fused_nodes                    r   r   zFusionFastGelu.fuse_1   sB    A&99&y'7'7':;x=A!!4!4!=!!zz,,^SA  #+>>&~'<'<Q'?@x=A!!4!4!=!!::**>5$H[\JJ**8S9q5^^aAQ7
 JJ))(aAQH[\	**11)UAGZ["JJ**?F&*Qq5**11/5qTUv![\^qr"

//#,YK" 0 
  JJ**=&*Oq5jj%%mUaAQPcd;zz,,S#699Q<:% 	
 zz//""1%&	
 ##N3%%<")),,Z8	

 ,
  ,8<8L8L$$Z__5r   c                    |j                   d   |vry||j                   d      }t        |      dk7  s|d   j                  dk7  ry|d   }| j                  j	                  |d      sy|j                   d   |vry||j                   d      }t        |      dk7  s|d   j                  dk7  ry|d   }| j                  j                  |d      }|dk  ry|j                   d   |vry||j                   d      }t        |      dk7  s|d   j                  dk7  ry|d   }| j                  j                  ||j                  d   |j                   d   k(  rdnd|      }	|	y| j                  j                  |dd|      }
|
y| j                  j                  |
dd	
      }|dk  ry| j                  j                  |
d|dk(  rdnd|      }|y| j                  j                  |dd||	g      }|y| j                  j                  |dd	
      }|dk  ry| j                  j                  |d|dk(  rdnd|      }|y| j                  j	                  |d      sy|j                  d   |	j                   d   k7  ry|||||
|||g}| j                  j                  ||j                   d   g||      sy| j                  j                  |       t        j                  d|	j                   d   g|j                   | j                  j                  d            }d|_        | j                   j#                  |       | j$                  | j&                  |j(                  <   y)a  
        This pattern is from Tensorflow model.
        Fuse Gelu with tanh into one node:
              +---------------------------+
              |                           |
              |                           v
            [root] --> Pow --> Mul -----> Add  --> Mul --> Tanh --> Add --> Mul(B=0.5)-->Mul-->
              |       (Y=3)   (B=0.0447...)       (B=0.7978...)    (B=1)                  ^
              |                                                                           |
              +---------------------------------------------------------------------------+
        Note that constant input for Add and Mul could be first or second input: like either A=0.5 or B=0.5 is fine.
        r   Nr   r    r!   r"   r#   r$   r%   r&   r(   r*   r+   r,   r   r-   r1   T)r2   r3   r4   r   r5   r7   r9   r8   r6   r:   r;   r<   r   r=   r>   r?   r@   rA   rB   rC   r0   )r   r   r   r   rD   rE   rG   rH   mul_after_mul_halfrJ   rK   rL   rM   rN   rO   rP   s                   r   r   zFusionFastGelu.fuse_2   s    A&99&y'7'7':;x=A!!4!4!=!!zz,,^SA  #+>>&~'<'<Q'?@x=A!!4!4!=A;JJ**8S9q5??1%88&xq'9:x=A!!4!4!=%a[JJ))#))!,0BBA
	
 **11)UAGZ["JJ**?F&*Qq5**11/5qTUv![\^qr"

//Nalukv/w JJ**=&*Oq5jj%%mUaAQPcd;zz,,S#699Q<9++A.. 	
 zz//&&q)*	
 ##N3%%$$Q'(&--,,Z8	

 ,
  ,8<8L8L$$Z__5r   c           	         |j                   d   |vry||j                   d      }t        |      dk7  s|d   j                  dk7  ry|d   }| j                  j	                  |d      sy|j                   d   |vry||j                   d      }t        |      dk7  s|d   j                  dk7  ry|d   }| j                  j                  |dd|      }|y| j                  j                  |d      }|dk  ry|j                  |dk(  rdnd   }	| j                  j                  |dd|      }
|
y| j                  j                  |
dd|      }|y| j                  j                  |d      }|dk  ry| j                  j                  |
dd|      }|y| j                  j                  |dd	
      }|dk  ry|j                  |dk(  rdnd   |	k7  ry| j                  j                  |d|dk(  rdnd|      }|y|j                  d   |	k(  rd}n|j                  d   |	k(  rd}ny| j                  j                  |d||      }|y| j                  j                  |dd	
      }|dk  ry|j                  |dk(  rdnd   |	k7  ry||||
|||||g	}| j                  j                  ||j                   d   g||      sy| j                  j                  |       t        j                  d|	g|j                   | j                  j                  d            }d|_        | j                  j!                  |       | j"                  | j$                  |j&                  <   y)a  
        OpenAI's gelu implementation, also used in Megatron:
           Gelu(x) = x * 0.5 * (1.0 + torch.tanh(0.79788456 * x * (1.0 + 0.044715 * x * x)))

        Fuse subgraph into a FastGelu node:
            +------------ Mul (B=0.79788456) -------------------+
            |                                                   |
            +-------------------------------+                   |
            |                               |                   |
            |                               v                   v
          [root] --> Mul (B=0.044715) --> Mul --> Add(B=1) --> Mul --> Tanh --> Add(B=1) --> Mul-->
            |                                                                                 ^
            |                                                                                 |
            +-----------> Mul (B=0.5) --------------------------------------------------------+
        r   Nr   r    r!   r"   r#   r$   r%   r&   r*   r   r-   r1   T)r2   r3   r4   r   r5   r6   r7   r8   r:   r;   r<   r   r=   r>   r?   r@   rA   rB   rC   r0   )r   r   r   r   rD   rE   mul_lastrG   rH   rI   rK   add_1jmul_7978kmul_before_add_1anothermul_0447mrO   rP   s                        r   r   zFusionFastGelu.fuse_3   s     A&99&y'7'7':;x=A!!4!4!=!!zz,,^SA  #+>>&~'<'<Q'?@x=A!!4!4!=A;::**8UDBUVJJ**8S9q5^^aAQ7
**11)UAGZ["

''FYZ=JJ**5#6q5::**?E4I\]JJ**8V6*Jq5>>qAv!1-;::225%a1fRSUhi#!!!$
2G##A&*4G::**+;UGM`aJJ**8V6*Jq5>>qAv!1-; 

 zz//__Q 	
 ##N3%%<OO,,Z8	

 ,
  ,8<8L8L$$Z__5r   )__name__
__module____qualname__r   r   r   r   r   boolr   r   r   __classcell__)r   s   @r   r
   r
      s    4i 44 d jXVZ^ jXkT kPT kYabfYg kZrT rPT rYabfYg rr   r
   N)loggingr   typingr   r   fusion_baser   onnxr   
onnx_modelr   r]   loggerr
    r   r   <module>ri      s.   
  !    	8	YV Yr   