
    g~^                        d dl Z d dlZd dlZd dlZd dlmZ d dlZd dlZd dlm	Z	m
Z
mZmZmZmZmZ d dlmZmZ d dlmZ  ej*                  e      Zd ZddZddZd	 Zg d
ZdgddgddgdZ G d d      Z	 	 	 	 	 	 	 	 	 ddZddZ y)    N)Dict)AttributeProto
GraphProto
ModelProto	NodeProtoTensorProtohelpernumpy_helper)infer_shapesinfer_shapes_path)versionc           
          | D cg c]8  }t        t        |j                  d            dd j                  d      d      : c}S c c}w )z|
    Convert numpy float16 to python int.

    :param np_list: numpy float16 list
    :return int_list: python int list
    H   N   )intbinviewzfill)np_list_s     U/var/www/openai/venv/lib/python3.12/site-packages/onnxruntime/transformers/float16.py_npfloat16_to_intr      sC     =DDGqCAFF3K $**2.2GDDDs   =Ac           	      R   d }| t        j                  | dkD           j                  d   dkD  r| t        j                  | dkD           j                         }| t        j                  | dkD           j	                         }||k\  rt
        j                  d| d|        ||k  rt
        j                  d| d|        | t        j                  | dk           j                  d   dkD  r| t        j                  | dk           j                         }| t        j                  | dk           j	                         }|| k  rt
        j                  d| d|         || k\  rt
        j                  d| d|         t        j                   |d| |      ||       } t        j                   || | d      | |       } t        j                   ||| t        d            ||       } t        j                   |t        d      | |       | |       } t        j                  |       S )a?  
    Convert float32 numpy array to float16 without changing sign or finiteness.
    Positive values less than min_positive_val are mapped to min_positive_val.
    Positive finite values greater than max_finite_val are mapped to max_finite_val.
    Similar for negative values. NaN, 0, inf, and -inf are unchanged.
    c                 :    t        j                  | |k  ||k        S N)nplogical_and)abcs      r   betweenz&convert_np_to_float16.<locals>.between0   s    ~~a!eQU++    r   zthe float32 number z will be truncated to infz-inf)	r   whereshapemaxminloggerdebugfloatfloat16)np_arraymin_positive_valmax_finite_valr"   positive_maxpositive_minnegative_maxnegative_mins           r   convert_np_to_float16r4   (   s	   , A&'--a014A 67;;=A 67;;=>)LL.|n<RSaRbcd++LL.|n<RScRdefA&'--a014A 67;;=A 67;;=N?*LL.|n<RTbSbRcde,,,LL.|n<RTdSdRefgxx8-=>@PRZ[Hxx!1 18Q?BRART\]Hxx%,GYabHxxfx.IN?\deH::hr#   c                    t        | t              st        dt        |              | j                  t        j
                  k(  rt        j                  | _        | j                  rSt        t        j                  | j                        ||      }t        |      }|| j                  dd g | j                  dd | j                  rCt        j                  | j                  d      }t        |||      }|j                         | _        | S )a  Convert tensor float to float16.

    Args:
        tensor (TensorProto): the tensor to convert.
        min_positive_val (float, optional): minimal positive value. Defaults to 1e-7.
        max_finite_val (float, optional): maximal finite value. Defaults to 1e4.

    Raises:
        ValueError: input type is not TensorProto.

    Returns:
        TensorProto: the converted tensor.
    3Expected input type is an ONNX TensorProto but got Nfloat32dtype)
isinstancer   
ValueErrortype	data_typeFLOATFLOAT16
float_datar4   r   arrayr   
int32_dataraw_data
frombuffertobytes)tensorr.   r/   float16_dataint_listfloat32_listfloat16_lists          r   convert_tensor_float_to_float16rK   J   s     fk*NtTZ|n]^^;,,,&..0&:K:K1LN^`noL(6H#+Fa #%Fa ??==	JL0?OQ_`L*224FOMr#   c                     t        j                  |       j                  }t        j                  | j
                  | j                  |      S r   )r
   to_arrayr&   r	   make_tensor_value_infonamer=   )rF   r&   s     r   make_value_info_from_tensorrP   o   s7    !!&)//E((f6F6FNNr#   )ArrayFeatureExtractor	BinarizerCastMapCategoryMapperDictVectorizerFeatureVectorizerImputerLabelEncoderLinearClassifierLinearRegressor
NormalizerOneHotEncoderRandomUniformLikeSVMClassifierSVMRegressorScalerTreeEnsembleClassifierTreeEnsembleRegressorZipMapNonMaxSuppressionTopKRoiAlignRangeCumSumMinMaxUpsampler      )Resize	GroupNormSkipGroupNormc                   (    e Zd ZdZdefdZdefdZy)InitializerTrackerz'Class for keeping track of initializer.initializerc                 .    || _         g | _        g | _        y r   )rr   
fp32_nodes
fp16_nodes)selfrr   s     r   __init__zInitializerTracker.__init__   s    &r#   nodec                 v    |r| j                   j                  |       y | j                  j                  |       y r   )rt   appendru   )rv   rx   is_node_blockeds      r   add_nodezInitializerTracker.add_node   s)    OO""4(OO""4(r#   N)__name__
__module____qualname____doc__r   rw   r   r|    r#   r   rq   rq      s    1K 
)Y )r#   rq   c
                 "    |dk\  sJ d       |t        t        j                  t        j                        j                        k  sJ d       |i n|}
t        | t              r| }t        j                  t        j                        t        j                  d      k\  rn|slt        j                  t        j                  j                  |            5 }|j                   }t#        ||       t        j$                  |      } d}ddd       nt        j$                  |      } t        | t&              st)        dt+        |              d}|sAt        j                  t        j                        t        j                  d	      k\  r	 t,        }	 |t.        }|g }t1        |      }t1        |      }t2        j5                  d
| d| d| d| d| d| d|        g }g }g }g }| ||       } |j7                  |        i }t1               }t1               }| j8                  j:                  D cg c]@  }|j*                  j<                  j>                  t@        jB                  k(  s5|j                   B }}| j8                  jD                  D cg c]@  }|j*                  j<                  j>                  t@        jB                  k(  s5|j                   B }}t        |tF              r)|D cg c]	  }||v s| }}|D cg c]	  }||v s| }}n|sg }g }tI        | j8                  j:                        D ]<  \  }}|j                   |v sdt        |      z   }|||j                   <   |jK                  |j                          dt        |      z   }| j8                  jL                  jK                         }|jO                  |       ||_        t@        jP                  |j*                  j<                  _        tS        jT                  d|j                   g|gt@        jP                  |      g}| j8                  jV                  jY                  |       |j7                  |       |jK                  |       ? tI        | j8                  jD                        D ].  \  }}|j                   |v sdt        |      z   }|||j                   <   |jK                  |j                          dt        |      z   }| j8                  jL                  jK                         }|jO                  |       ||_        t@        jP                  |j*                  j<                  _        tS        jT                  d|g|j                   gd|      g}| j8                  jV                  jY                  |       |j7                  |       |jK                  |       1 i }|rg } |D ]  }!t        |!t&              r| j7                  |!j8                         t        |!tZ              r>|!j\                  D ]H  }|j^                  t@        jB                  k(  s!|j                   |vsJ ta        |      ||j                   <   J |!jV                  D ]  }|j                   |v rtc        te        |j:                              D ]3  }|j:                  |   |v s||j:                  |      |j:                  |<   5 tc        te        |jD                              D ]3  }|jD                  |   |v s||jD                  |      |jD                  |<   5 |jf                  |v xs |j                   |v }"tI        |j:                        D ]e  \  }}||v s|"xsB |th        jk                  |jf                  g       v xr ||
jk                  |jf                  g       v}#||   jm                  ||#       g |"r|j7                  |       k|jf                  dk(  rU|jn                  D ]F  }$|$j                   dk(  s|$jp                  t@        jB                  k(  s1t@        jP                  |$_8         n |jf                  dv rd}%|jn                  D ]G  }$|$j                   dk(  sd}%|$jp                  t@        jB                  k(  s3t@        jP                  |$_8        I |jf                  dv r@|%s>|jn                  jY                  tS        jr                  dt@        jP                        g       |jf                  th        vs|jf                  |
v r$|jn                  D ]  }$| j7                  |$        |j7                  |        t        |!tt              r| j7                  |!jv                         |!jx                  D ]  }| j7                  |        |!jz                  jO                  t}        |!jz                  ||             |!j~                  D ]  }t}        |||      } t        |!tZ              s+t        j                  |!j:                  |!jD                  |!jL                        D ];  }|j*                  j<                  j>                  t@        jB                  k(  rH|j                   |vr:t@        jP                  |j*                  j<                  _        |j7                  |       |j*                  j                  d      s|j*                  j                  j>                  j<                  j>                  t@        jB                  k(  s|j                   |vst@        jP                  |j*                  j                  j>                  j<                  _        |j7                  |       >  | }|r|j                         D ]  }&|s|&j                  st}        |&j\                  ||      |&_.        |j7                  t        |&j\                               |&j                  s_|rbt2        j                  d|&j                           |D ]\  }'tI        |'j:                        D ]@  \  }}|th        |'jf                     vs||
jk                  |'jf                  g       v r;|D ]   }(||(j                   k(  s| j8                  jL                  jK                         }|jO                  |(       |'j                   dz   t        |      z   }||_        t@        jB                  |j*                  j<                  _        |'j                   d z   t        |      z   }tS        jT                  d|g|gd|      g}| j8                  jV                  jY                  |       ||'j:                  |<    @ C _ |	rt@        j                  nt@        jB                  })|D ]Y  }'tc        te        |'j:                              D ]	  }|'j:                  |   }|D ]  }(||(j                   k(  s| j8                  jL                  jK                         }|jO                  |(       |'j                   dz   t        |      z   }||_        |)|j*                  j<                  _        |'j                   d z   t        |      z   }tS        jT                  d|g|g|)|      g}| j8                  jV                  jY                  |       ||'j:                  |<    	  tc        te        |'jD                              D ]	  }|'jD                  |   }*|D ]  }(|*|(j                   k(  s| j8                  jL                  jK                         }|jO                  |(       |'j                   d!z   t        |      z   }||_        |)|j*                  j<                  _        |'j                   d"z   t        |      z   }tS        jT                  d|g|*gd#|      g}| j8                  jV                  jY                  |       ||'jD                  |<    	  \ | S # 1 sw Y   xY w# w xY wc c}w c c}w c c}w c c}w )$a  Convert tensor float type in the input ONNX model to tensor float16.

    Args:
        model (ModelProto or str): The ONNX model or path of the model to convert.
        min_positive_val (float, optional): minimal positive value. Defaults to 5.96e-08.
        max_finite_val (float, optional): maximal finite value of float16. Defaults to 65504.
        keep_io_types (Union[bool, List[str]], optional): It could be boolean or a list of float32 input/output names.
                                                          If True, model inputs/outputs should be left as float32.
                                                          Defaults to False.
        disable_shape_infer (bool, optional): Skips running onnx shape/type inference.
                                              Useful if shape inference has been done. Defaults to False.
        op_block_list (List[str], optional): List of op types to leave as float32.
                                             Defaults to None, which will use `float16.DEFAULT_OP_BLOCK_LIST`.
        node_block_list (List[str], optional): List of node names to leave as float32. Defaults to None.
        force_fp16_initializers(bool): force converting all float initializers to float16.
                                       Default to false, which will convert only the one needed to avoid precision loss.
        force_fp16_inputs(Dict[str, List[int]]): Force the conversion of the inputs of some operators to float16, even if
                                                 this script's preference it to keep them in float32.
    Raises:
        ValueError: input type is not ModelProto.

    Returns:
        ModelProto: converted model.
    "\o>zginvalid min_positive_val. smallest positive float16 value: subnormal 5.96e-08, and normalized 6.104e-05z4invalid max_finite_val. largest float16 value: 65504Nz1.8.0)dirTz$Expected an ONNX ModelProto but got z1.2.0z"fp16 parameters: min_positive_val=z max_finite_val=z keep_io_types=z disable_shape_infer=z op_block_list=z node_block_list=z force_fp16_initializers=graph_input_cast_graph_input_castCast)torO   graph_output_cast_graph_output_castrl   r   )EyeLikeMultinomialRandomNormalRandomNormalLikeRandomUniformr]   SequenceEmpty	BernoulliFr9   )r   r   r   sequence_typezXinitializer is used by both fp32 and fp16 nodes. Consider add these nodes to block list:_input_cast__input_cast_output_cast__output_cast
   )Jr+   r   finfor,   r'   r:   strr   parseonnx__version__tempfileNamedTemporaryFileospathdirnamerO   r   loadr   r;   r<   r   DEFAULT_OP_BLOCK_LISTsetr)   r*   rz   graphinputtensor_type	elem_typer   r>   outputlist	enumerateadd
value_infoCopyFromr?   r	   	make_noderx   extendr   rr   r=   rq   rangelenop_typeALWAYS_FLOAT_INPUTSgetr|   	attributeimake_attributer   ggraphstrK   tensors	itertoolschainHasFieldr   valuesru   rP   rt   infoBFLOAT16)+modelr.   r/   keep_io_typesdisable_shape_inferop_block_listnode_block_listforce_fp16_initializersforce_fp16_inputs#use_bfloat16_as_blocked_nodes_dtypeforce_fp16_inputs_dict
model_pathtmpfileshape_infer_model_pathfunc_infer_shapequeuevalue_info_list	node_listmixed_float_type_node_listname_mappinggraph_io_to_skipio_castsnfp32_inputsfp32_outputsr   output_name	node_namenew_value_infonew_node
input_namefp32_initializers
next_levelqr{   use_fp32_weightattr	has_dtypevaluerx   r   accuracy_typer   s+                                              r   convert_float_to_float16r      s   J 	H$qpq$U288BJJ#7#;#;<<t>tt<#4#<RBS%
==))*gmmG.DDM`,,1LMQX)0&!*.DE		"89&*# NM IIj)EeZ(?U}MNN7==1A1A#BgmmT[F\#\	+ -&M/*O
LL
,-=,>>N~N^^mn{m|  }R  Sf  Rg  gv  wD  vE  EV  Wf  Vg  g@  AX  @Y  	Z
 EOI
 "$ # '	LLLuuH#(;;#4#4j#4a8J8J8T8TXcXiXi8i166#4Kj$)KK$6$6l$6q!&&:L:L:V:VZeZkZk:kAFF$6Ll-&"-D+Qm1Cq+D#/F<a13E<F%++++,166[ -A6K#.L   (*SV3I"[[33779N##A&"-N8C8K8KN++5((!&&K=[M`M`gpqrHKK##H-"">2LL# -" %++,,-166\!-A6J#-L   (+c!f4I"[[33779N##A&",N8C8K8KN++5((*xAT]^_HKK##H-"">2LL# ." 8:

A!Z(!!!''*!Z(A{{k&7&77 vv->>>>4Fq4I)!&&1 '
 A vv) "3qww<0771:5)5aggaj)AAGGAJ 1 #3qxx=188A;,6*6qxx{*CAHHQK 2 '(ii=&@&]AFFoD]O)2177);:%)::.= / !%8%<%<QYY%K K !W$%-C-G-G		SU-V$V , .j9BB1oV *< '!((+99.()#'99#4;CTCT9T-8-@-@DF$) )4
 99 	) 	 ).I()#'99#704I'+vv1B1B'B1<1D1D	 )4 !"		-_ _ir ! 2 2F4I4I'S^SfSf4g3h i 99,??199PfCf() * 1 1$ 7 )4 7==a@u  | !^,!!!##&A%%a( "<QSSBRTbcdA7;K^\A # !Z( #!((ALLIAvv))33{7H7HH66)99;F;N;NAFF..8+2215vv766//99EEOOS^SdSdd vv-==WbWjWj 4 4 > > J J T / 6 6q 9 Jk @ E H #))+"e&6&6 ?@Q@QSces tE""#>u?P?P#QR(?notoo  oA  B , +&tzz2MAz+DLL99QBXB\B\]a]i]ikmBn=n-
0%*[[%;%;%?%?%AN"++J7"&))n"<s1v"EK*5N'@K@Q@QN''33= $		M 9CF BI & 0 0*}YZaj klHKK$$++H5$/DJJqM . 3 +( -PK((U`UfUfM s4::'AAJ-
0%*[[%;%;%?%?%AN"++J7"&))n"<s1v"EK*5N'@MN''33= $		M 9CF BI & 0 0*}Yfmv wxHKK$$++H5$/DJJqM . (& s4;;'(A[[^F-
Z__,%*[[%;%;%?%?%AN"++J7!%_!<s1v!EJ*4N'@MN''33= $		N :SV CI & 0 0*xTV]f ghHKK$$++H5%/DKKN . )/ P LU NM" @ klDFsN   0+8 *6=!=	6A@ A@$	A@.A@8	A@A@+58:c                    t        | t              st        dt        |              | j                  t        j
                  k7  rt        d      d}| j                  rt        j                  | j                        }| j                  r!t        j                  | j                  d      }|t        d      t        |||      }t        j                  t        j                  |t        j                  |      z
              S )zSMeasure the maximum absolute difference after converting a float tensor to float16.r6   z#Expected tensor data type is float.Nr7   r8   zexternal data not loaded!)r:   r   r;   r<   r=   r>   r@   r   rA   rC   rD   RuntimeErrorr4   amaxabsr7   )rF   r.   r/   float32_datarG   s        r   float_to_float16_max_diffr     s    fk*NtTZ|n]^^;,,,>??Lxx 1 12}}V__IF677(7GXL77266,L)AABCCr#   )r        @)	r   r   FFNNFNF)!r   loggingr   r   typingr   numpyr   r   r   r   r   r   r   r	   r
   onnx.shape_inferencer   r   	packagingr   	getLoggerr}   r)   r   r4   rK   rP   r   r   rq   r   r   r   r#   r   <module>r      s      	     e e e @ 			8	$E D"JO
 B #$Aq6QPQFS ) )" !(-yx	Dr#   