
    g0                         d dl mZ d dlmZmZ d dlZd dlmZmZ d dlm	Z	m
Z
mZmZ d dlmZ d dlmZ  ee      Z G d d	      Z G d
 d      Zy)    )	getLogger)OptionalTupleN)array_equalndarray)	NodeProtoTensorProtohelpernumpy_helper)onnx_pb)	OnnxModelc            
       .   e Zd ZdefdZdedeeef   fdZddefdZ		 	 	 ddede
d	ee   d
ee   fdZdefdZdefdZed        Zeddefd       Zeddefd       Zedej(                  fd       Zed dedefd       Zde
fdZd Zd Zd Zd Zy)!FusionUtilsmodelc                     || _         y N)r   )selfr   s     Z/var/www/openai/venv/lib/python3.12/site-packages/onnxruntime/transformers/fusion_utils.py__init__zFusionUtils.__init__   s	     %
    
input_namereturnc                 B   | j                   j                  |      }|b|j                  j                  j                  t
        j                  k7  r1| j                  |      \  }}t        j                  d| d       d|fS t        j                  d| d|d u        d|fS )NzCasted graph input z	 to int32TzDid not cast graph input z to int32: found F)
r   find_graph_inputtypetensor_type	elem_typer	   INT32cast_input_to_int32loggerdebug)r   r   graph_inputcast_output	cast_nodes        r   cast_graph_input_to_int32z%FusionUtils.cast_graph_input_to_int32   s    jj11*="{'7'7'C'C'M'MQ\QbQb'b%)%=%=j%I"KLL.zl)DE$$0<MkaeNeMfghj  r   c                    |dz   |z   }|dk(  rt        t        j                        }nI|dk(  rt        t        j                        }n*|dk(  rt        t        j                        }nt        d      | j                  |||      }||fS )N_int32float32float16z"Invalid target_type: {target_type})intr	   r   FLOATFLOAT16
ValueErroradd_cast_node)r   r   target_typeoutput_nameto_typer$   s         r   
cast_inputzFusionUtils.cast_input   s     3&4'!+++,GI%+++,GI%+--.GABB&&z7KH	I%%r   Nr2   r1   
graph_namec                 x   ||d| z   }|g}|| j                   j                         }||v r&||   }|r|j                  dk(  r|j                  d   g}t	        j
                  d||g      }|j                  j                  t	        j                  d|      g       | j                   j                  ||       |S )N	_cast_to_Castr   )inputsoutputsto)r4   )
r   output_name_to_nodeop_typeinputr
   	make_node	attributeextendmake_attributeadd_node)	r   r   r2   r1   r;   r4   r8   parent_noder$   s	            r   r/   zFusionUtils.add_cast_node/   s     $7)'<<K &"&**"@"@"B,,-j9K{22f<%++A./$$VF[MR	""F$9$9$$H#IJ

I*=r   c                 &    | j                  |d      S )Nr(   )r3   )r   r   s     r   r   zFusionUtils.cast_input_to_int32J   s    z733r   c                    | j                   j                         }||   }|D ]  }|j                  dk(  sd}|j                  D ]<  }|j                  dk(  s|j
                  t        t        j                        k(  s:d} n |sc|j                  d   }| j                   j                  |       | j                   j                  ||        y )Nr7   Fr:   Tr   )r   input_name_to_nodesr<   r?   nameir+   r	   r   outputremove_nodereplace_input_of_all_nodes)r   r   rF   nodesnodeis_int32attr1   s           r   remove_cast_int32zFusionUtils.remove_cast_int32M   s    "jj<<>#J/D||v% >>Cxx4'CEES9J9J5K,K#' * "&++a.KJJ**40JJ99+zR r   c                 *   d}| j                   |   |v rP| || j                   |      v r<|| j                   |      j                  |        t        || j                   |            }|| j                   |<   ||v r||   j                  |        |S | g||<   |S )Nr   )r=   removelenappend)rM   rH   new_input_namerF   old_input_references        r   update_node_inputzFusionUtils.update_node_input\   s    JJqM00d>QRVR\R\]^R_>`6`

1.55d;"%&9$**Q-&H"I&

100/66t< #" 48&/""r   c                     |j                   |   }|j                   |   }t        j                  ||||      }|dk(  xr | j                  |       }	|	S )a  
        Before:
              (input)-->parent-->node-->(output)
        After:
              (input)-->parent-->
                |
                +----->node-->(output)

        This function returns a flag whether the parent node can be removed.
        r   )r=   r   rW   find_graph_output)
r   rM   rC   rF   node_input_indexparent_input_indexold_input_namerU   rV   parent_can_be_removeds
             r   skip_parentzFusionUtils.skip_parentl   se     $45$**+=>);;DBRTbdwx "5!9 j5CZCZ[iCj?j$$r   attribute_namec                     |}| j                   D ]'  }|j                  |k(  st        j                  |      }) t	        |t
              r&t	        |t        t
        f      xr t        ||d      S ||k(  S )a  Verify that a node has expected value for an attribute.

        Args:
            node (NodeProto): a node to check
            attribute_name (str): name of attribute
            expected_value (Any): expected value of the attribute
            default_value (Any, optional): default value if the attribute does not exist. Defaults to None.

        Returns:
            bool: whether the check is passed or not
        F	equal_nan)r?   rG   r
   get_attribute_value
isinstancelistr   r   )rM   r_   expected_valuedefault_valuevalueattrs         r   check_node_attributez FusionUtils.check_node_attribute   sk     NNDyyN*2248 # nd+uwo6oKX]in<ooN**r   tensorc                    t        | t        j                        st        dt	        |              t        | j                        dk7  s'| j                  t        j                  j                  k7  rt        d      | j                  rnt        j                  t        j                  | j                  d      | j                        }t        j                  |ddg      }|j                         | _	        | S t        d      )	zTranspose a 2-D INT8 TensorProto
        Args:
            tensor (TensorProto): tensor to be transposed
        Returns:
            tensor (TensorProto): transposed tensor
        z3Expected input type is an ONNX TensorProto but got    z'Only INT8 2-D tensors can be transposedint8)dtype   r   zonly raw buffer supported)rd   
onnx_protor	   r.   r   rS   dims	data_typeINT8raw_datanumpyreshape
frombuffer	transposetobytes)rk   
int32_dataint32_transposed_datas      r   transpose_2d_int8_tensorz$FusionUtils.transpose_2d_int8_tensor   s     &*"8"89RSWX^S_R`abbv{{q F$4$4
8N8N8S8S$SFGG??u'7'7v'VX^XcXcdJ$)OOJA$G!3;;=FO
  899r   rM   c                    | j                   dvr"t        j                  d| j                           |j                  | j                  d         }|y|j
                  dk(  xs# |j
                  dk(  xr |j                  d   dk(  }|r|syt        | j                        dk(  ry|j                  | j                  d         }|j
                  |j
                  k7  ry|yt        j                  |dk(        S )a  Verify if a provided QuantizeLinear (Q) / DequantizeLinear (DQ) node is a good candidate for fusion.
           It is a good candidate for fusion if:
           (1) The Q/DQ node is for per-tensor quantization if allow_per_tensor_quantization_only is `True`
           (2) The Q/DQ node should have constant scale
           (3) The Q/DQ node should have a zero point of 0
        Args:
            node (NodeProto): a Q/DQ node to check
        Returns:
            bool: whether the check is passed or not
        >   QuantizeLinearDequantizeLinearz+Provided node is not a Q/DQ node. Op Type: rp   Fr   rm   T)
r<   r    r!   get_constant_valuer=   ndimshaperS   rv   all)rM   r   "allow_per_tensor_quantization_onlyscalescale_has_single_element
zero_points         r   check_qdq_node_for_fusionz%FusionUtils.check_qdq_node_for_fusion   s     <<EELLFt||nUV((A7 = $)::?#_uzzQ7^5;;WX>]^K^ -6N tzz?a --djjm<
 ::( yyq))r   input_indexc                     t        |j                        |kD  sJ | j                  j                  |j                  |         }t	        |t
              r&t	        |t        t
        f      xr t        ||d      S ||k(  S )a7  Verify that a node has expected input value

        Args:
            node (NodeProto): a node to check
            input_index (int): index of its input to be verified
            expected_value (Any): expected value of the input

        Returns:
            bool: whether the check is passed or not
        Fra   )rS   r=   r   r   rd   re   r   r   )r   rM   r   rf   rh   s        r   check_node_input_valuez"FusionUtils.check_node_input_value   sm     4::,,,

--djj.EFnd+uwo6oKX]in<ooN**r   c                    g }| j                   j                         }| j                   j                         D ]k  }|j                  dk(  s|j                  d   |vs%| j                   j                  |j                  d   |j                  d          |j                  |       m |r>| j                   j                  |       t        j                  dt        |       d       yy)z>Remove Identity nodes, except those right before graph output.Identityr   zRemoved z Identity nodesN)r   get_graphs_output_namesrL   r<   rI   rK   r=   rT   remove_nodesr    inforS   )r   nodes_to_removegraph_output_namesrM   s       r   remove_identity_nodesz!FusionUtils.remove_identity_nodes   s    !ZZ??AJJ$$&D||z);;q>);;JJ99$++a.$**UV-X#**40	 ' JJ##O4KK(3#7"8HI r   c                 8    | j                   j                          y r   )r   remove_cascaded_cast_nodesr   s    r   r   z&FusionUtils.remove_cascaded_cast_nodes   s    

--/r   c                 8    | j                   j                          y r   )r   remove_useless_cast_nodesr   s    r   r   z%FusionUtils.remove_useless_cast_nodes  s    

,,.r   c                 H   | j                   j                  d      }|yg }| j                   j                         D ]  }|j                  dk(  s|j	                  |j
                  d         }|j	                  |j                  d         }|sR|sU||k(  s[t        j                  d|j                   d|        |j                  |        |rQt        | j                   j                               }t        | j                   j                               }|D ]  }t        t        |j                        |z        rt        t        |j
                        |z        smt        | j                   j!                         |j
                  d            dk(  r7| j                   j#                  |j
                  d   |j                  d          n7| j                   j%                  |j                  d   |j
                  d          | j                   j'                  |        yy)	ziRemove reshape node that is not needed based on symbolic shape inference: input and output has same shapeT)updateNReshaper   zRemove reshape node z* since its input shape is same as output: rp   )r   infer_runtime_shaperL   r<   get_edge_shaper=   rI   r    r   rG   rT   setget_graphs_input_namesr   boolrS   rF   replace_output_of_all_nodesrK   rJ   )r   shape_inferr   rM   input_shapeoutput_shapegraph_input_namesr   s           r   remove_useless_reshape_nodesz(FusionUtils.remove_useless_reshape_nodes  s   jj44D4AJJ$$&D||y()88AG*99$++a.I<K<4OKK.tyyk9cdocpq $**40 '  #DJJ$E$E$G H!$TZZ%G%G%I!J'DKK(+==> TZZ3D!DE

 > > @A OPTUU

>>tzz!}dkkZ[n] JJ99$++a.$**UV-X

&&t, ( r   )r(   )NNN)r   r   r   )T)__name__
__module____qualname__r   r   strr   r   r%   r3   r+   r   r/   r   rP   staticmethodrW   r^   rj   rq   r	   r}   r   r   r   r   r   r   r    r   r   r   r      s=   &i &!C !E$)<L !&S &( &* $(  c]	 SM64c 4SC S # # %9 % %* +3 + +, )?)?  . (*	 (*) (* (*T+ +(J0/-r   r   c                   ,    e Zd Zeddededefd       Zy)NumpyHelperrk   
fill_zerosr   c                     |r4ddl m} t        | j                  |j                  | j
                           S t        j                  |       S )Nr   )mapping)r   ro   )onnxr   r   rr   TENSOR_TYPE_TO_NP_TYPErs   r   to_array)rk   r   r   s      r   r   zNumpyHelper.to_array'  sE     $kk44V5E5EF 
 $$V,,r   N)F)r   r   r   r   r	   r   r   r   r   r   r   r   r   &  s)    - -$ -7 - -r   r   )loggingr   typingr   r   rv   r   r   r   r   r	   r
   r   r   rq   
onnx_modelr   r   r    r   r   r   r   r   <module>r      s@   
  "  & = = &  	8	R- R-j- -r   