
    g<                        d Z ddlZddlZddlZddlmZ ddlmZmZm	Z	m
Z
mZmZ ddlZddlZddlmZmZ ddlZddlmZmZ ddlmZ dd	lmZmZmZmZmZmZ d
Z ee      Z 	 	 d.dee!ef   dee!ef   de
ee!      de"ddf
dZ#	 	 d/de!dede
ee!      dee!e	ejH                     f   fdZ%edz   Z&dee!ee!eejH                     f   f   de!de
eejH                        de
eejH                        ddf
dZ'	 d0dee!eejH                     f   de
ee!eejH                     f      dee!ee!eejH                     f   f   fdZ(dejH                  dejH                  d ejH                  d!e)de
ejH                     f
d"Z*d#e!d$e!dee!ee!ejH                  f   f   fd%Z+d&eeejH                     ejH                  f   d'eeejH                     ejH                  f   de,fd(Z-e-fd)ee!ee!ejH                  f   f   d*eejH                  ejH                  ge,f   dee!e,f   fd+Z.e-fd,ee!ee!eejH                     f   f   d*eeejH                     eejH                     ge,f   dee!ee!e,f   f   fd-Z/y)1a  Utilities to run a given ONNX model, while saving input/output tensors of
eligible operator nodes.

A use case is to debug quantization induced accuracy drop. An AI engineer can
run the original float32 model and the quantized model with the same inputs,
then compare the corresponding activations between the two models to find
where the divergence is.

Example Usage:

```python
    class ExampleDataReader(CalibrationDataReader):
        def __init__(self):
            ...
        def get_next(self):
            ...

    input_data_reader = ExampleDataReader()

    augmented_model_path = str(Path(self._tmp_model_dir.name).joinpath("augmented_model.onnx"))
    modify_model_output_intermediate_tensors (path_to_onnx_model, augmented_model_path)

    tensor_dict = collect_activations(augmented_model_path, input_data_reader)
```

`tensor_dict` points to a dictionary where the keys are tensor names and each value
is a list of tensors, one from each model run

    N)Path)CallableDictListOptionalSequenceUnion)helpernumpy_helper   )CalibraterBaseCalibrationDataReader)	ONNXModel)DEQUANT_OP_NAMEDEQUANT_OUTPUT_SUFFIXQUANT_INPUT_SUFFIXTENSOR_NAME_QUANT_SUFFIXfind_by_nameload_model_with_shape_infer_ReshapedSavedOutputinput_model_pathoutput_model_pathop_types_for_savingsave_as_external_datareturnc                    |g }t        | |      }|j                  }|j                  |      \  }}dt        t	        j                               z   }t        j                  t        j                  dgt        j                        |      }	|j                  j                  j                  |	       |D ]  }
|
t        z   }t        j                  j!                  d|
|g|g|      }|j                  j"                  j                  |       t        j$                  |||
   j&                  j(                  j*                  dg      }|j                  j,                  j                  |        t        j.                  |||       y)	a  Augment a given ONNX model to save node input/output tensors.

    Add all input/output tensors of operator nodes to model outputs
    so that their values can be retrieved for debugging purposes.

    Args:
        input_model: the path to load the model.
        op_types_for_saving: Operator types for which the
                input/output should be saved. By default, saving all the
                float32/float16 tensors.

    Returns:
        The augmented ONNX model
    N)op_types_to_calibrateLinearReshape_dtypeReshape)inputsoutputsname)r   )r   modelselect_tensors_to_calibratestrtimer   
from_arraynumpyarrayint64graphinitializerappend_TENSOR_SAVE_POSTFIXonnxr
   	make_nodenodemake_tensor_value_infotypetensor_type	elem_typeoutputsave)r   r   r   r   savermodel_to_augmenttensorsvalue_infosreshape_shape_namereshape_shapetensor_namereshape_outputreshape_nodereshape_output_value_infos                 \/var/www/openai/venv/lib/python3.12/site-packages/onnxruntime/quantization/qdq_loss_debug.py(modify_model_output_intermediate_tensorsrF   @   sX   * " +CVWE{{ <<=MNG[)C		,<< ++EKKEKK,PRdeM&&--m<$';;{{,,!34#$	 - 
 	##**<8$*$A$AK499EEOORTQU%
! 	%%,,-FG  	II3    augmented_modelinput_readerexecution_providersc                    |3t        j                         }t         j                  j                  |_        |dg}t        j
                  | ||      }g }|D ]#  }|j                  |j                  d|             % |st        d      i }|j                         }|D ]k  }	t        ||	      D ]Z  \  }
}|
j                  j                  t              s&|
j                  dt          }|j                  |g       j                  |       \ m |S )a  Run augmented model and collect activations tensors.

    Args:
        augmented_model: Path to augmented model created by modify_model_output_intermediate_tensors ()
        input_reader: Logic for reading input for the model, augmented model have the same
            input with the original model.
        session_options: Optional OnnxRuntime session options for controlling model run.
            By default graph optimization is turned off
        execution_providers: Collection of execution providers for running the model.
            Only CPU EP is used by default.

    Returns:
        A dictionary where the key is tensor name and values are list of tensors from each batch
    NCPUExecutionProvider)sess_options	providersz3No data is collected while running augmented model!)onnxruntimeSessionOptionsGraphOptimizationLevelORT_DISABLE_ALLgraph_optimization_levelInferenceSessionr0   runRuntimeErrorget_outputszipr%   endswithr1   _TENSOR_SAVE_POSTFIX_LEN
setdefault)rH   rI   session_optionsrJ   inference_sessionintermediate_outputsinput_doutput_dictoutput_infobatchr9   output_dataoutput_names                rE   collect_activationsre   s   s   * %4463>3U3U3e3e0"56#44$% ##$5$9$9$$HI  PQQK#//1K%#&{E#:FK{{##$89$kk*D,D+DE&&{B7>>{K $; & rG   _1qdq_cmpactivation_namepre_qdq_tensorspost_qdq_tensorsc                 :    ||i | |<   || |   d<   || |   d<   y y y )Npre_qdqpost_qdq )rg   rh   ri   rj   s       rE   _add_pre_post_qdq_pairro      s@     #(C#% .= +/? , )D#rG   qdq_activationsfloat_activationsc                 `   i }| j                         D ]  \  }}|j                  t              r5|dt        t                }| j	                  |      }|}t        ||||       P|j                  t              r5|dt        t                }| j	                  |      }|}t        ||||       |j                  t              s|dt        t                }| j	                  |      }|}t        ||||        |s|S |j                         D ]  \  }}	|j	                  |      }
|
|
|	d<     |S )a  Comparing activation values to help debugging accuracy loss due to quantization.

    This functions takes saved activations from the QDQ model and (optionally) the
    float point model, and provides a data structure for comparing:
        * from the qdq model, activation values before and after QDQ operation
        * across both models, activations from the orignal model vs the corresponding
          activations in the QDQ model

    Arg:
        qdq_activations: Output of `collect_activations`. This must be from a quantized
            model with QDQ format.
        float_activations: Output of `collect_activations`. This must be from the float
            point model.

    Returns:
        Dict for comparing pre and post quantized activation tensors. E.g.
        ```
        qdq_cmp = cmp_qdq_input_output(qdq_activations)
        print(qdq_cmp['activation1']['pre_qdq'][0])
        print(qdq_cmp['activation1'][`post_qdq'][0])


        qdq_cmp = cmp_qdq_input_output(qdq_activations, float_activations)
        print(qdq_cmp['activation1']['float'][0])
        print(qdq_cmp['activation1']['pre_qdq'][0])
        print(qdq_cmp['activation1'][`post_qdq'][0])
        ```
    Nfloat)itemsrY   r   lengetro   r   _POST_QDQ_POSTFIX1)rp   rq   rg   rA   r=   pre_namerj   ri   act_name
act_values
float_actss              rE   create_activation_matchingr|      sG   B >@G / 5 5 7W 23"#=c*<&=%=>H.228<%O"7HoGWX!!"78"#@c*?&@%@AH-11(;O&"7HoGWX!!"45"#=c*<&=%=>H-11(;O&"7HoGWX !8"  '*&**84
!",Jw !0
 NrG   weight_tensorweight_scale	weight_zpchannel_axisc                 J   |j                   |j                   k(  sJ |j                  dk(  r| |z
  |z  S |j                  dk(  sJ t        | j                         }d||<   | j                   |   }d }t	        |      D ]  }| j                  ||      }|||   z
  ||   z  }	|dk(  r%t        j                  |	      j                  |      }Mt        j                  |	      j                  |      }
t        j                  ||
f|      } |y |j                  | j                          |S )Nr   r   )
shapesizendimlistrangetaker+   asarrayreshapeconcatenate)r}   r~   r   r   reshape_dimschannel_countdequantized_weightsiper_channel_datadequantized_per_channel_datachannel_weightss              rE   _run_dequantize_linearr      s2    000~~	)\99>>Q++,L!"L!''5M=!(--a>(89Q<(G<XY?'Z$6"'--0L"M"U"UVb"c#mm,HIQQR^_O"'"3"35H/4Z\h"i " " 3 34rG   float_model_pathqdq_model_pathc                    t        t        t        |                   }t        t        t        |                  }i }|j                         }|j	                         D ]v  }|j
                  t        k7  r|j                  d   }t        ||      }|s6|j                  t              st        j                  d| d| d       hd}	|j                  D ]  }
|
j                  dk(  s|
j                  }	  t!        j"                  |      }t!        j"                  t        |j                  d   |            }t%        |j                        dkD  r-t!        j"                  t        |j                  d   |            }n/t'        j(                  |j*                  t&        j,                  	      }|j.                  |j.                  cxk(  rdk(  r5n n2|j1                  t3                     }|j1                  t3                     }|j*                  |j*                  k7  r%t5        d
|j*                   d|j*                         t7        ||||	      }|dt%        t                }|t        j                  d| d| d        t        ||j                               }|st        j                  d|  d| d       Zt!        j"                  |      }||d||<   y |S )a  Comparing weight values to help debugging accuracy loss due to quantization.

    This functions takes the float model and the qdq model, and provides a data structure for comparing
    their corresponding weights to locate quantization errors

    Arg:
        float_model_path: Path points to the float point model.
        qdq_model_path: Path points to the qdq model.

    Returns:
        Dict for comparing weight tensors. E.g.
        ```
        qdq_weight_cmp = create_weight_matching(float_model, qdq_model)
        print(qdq_weight_cmp['activation1']['float'])
        print(qdq_weight_cmp['activation1']['dequantized'])
        ```
    r   zModel Error in 'z': Dequantized tensor name 'z' not recognized!r   axisr      r    z2scale and zero_point must have the same shape but z != )r   Nz': 'z'' per-channel quantization on 0 channelz': weight tensor 'z' not found!)rs   dequantized)r   r   r   r/   nodesop_typer   inputr   rY   r   loggingerror	attributer%   r   r   to_arrayru   r+   zerosr   int32r   r   tuplerV   r   )r   r   float_onnx_modelqdq_onnx_modelmatched_weightsinitializersr4   weight_nameweight_valuesr   attrr}   r~   r   weight_quantfloat_valuesweight_floats                    rE   create_weight_matchingr     s   $ !!<TBR=S!TU:4;OPQN;=O!--/L$$&<<?*::a=$[,?##$<=MM,^,<<XYdXeevwxNNDyyF"vv # %--m<#,,\$**Q--VWtzz?Q$--l4::a=,.WXIL$6$6ekkJI 	3!3'//8L!))%'2I0D\EWEWDXX\]f]l]l\mn  .m\9cgh!"BS)A%B$BCMM,^,<DMtuv#K1A1M1M1OPMM,-=,>>PQ\P]]ijk#,,\:1=l'[$W 'Z rG   xyc                 t   t        | t        j                        r| g}n| }t        |t        j                        r|g}n|}t        |      t        |      k7  rt	        d      t        j
                  |      j                         }t        j
                  |      j                         }t        j                  d      j                  }t        t        j                  j                  |      |      }t        t        j                  j                  ||z
        |      }||z  }	dt        j                  |	      z  S )Nz%Unequal number of tensors to compare!rs      )
isinstancer+   ndarrayru   rV   r   flattenfinfoepsmaxlinalgnormmathlog10)
r   r   xlistylistleftrightepsilontensor_norm	diff_normress
             rE   *compute_signal_to_quantization_noice_ratior   U  s     !U]]#!U]]#
5zSZBCCU#++-De$,,.Ekk'"&&Gell''-w7KELL%%dUl3W=I
	
!C

3rG   weights_matcherr_funcc                 ^    i }| j                         D ]  \  }} ||d   |d         ||<    |S )Nrs   r   rt   )r   r   resultr   weight_matchs        rE   compute_weight_errorr   m  sA      "F%2%8%8%:!\&|G'<l=>YZ{ &;MrG   activations_matchc                     i }| j                         D ]4  \  }}i } ||d   |d         |d<   |d   }|r |||d         |d<   |||<   6 |S )Nrl   rm   qdq_errrs   
xmodel_errr   )r   r   r   r%   match
err_resultfloat_activations          rE   compute_activation_errorr   w  st     +-F(..0e')
 (y)95;L M
9 >'/0@%
BS'TJ|$!t 1 MrG   )NF)NN)N)0__doc__r   r   r)   pathlibr   typingr   r   r   r   r   r	   r+   r2   r
   r   rO   	calibrater   r   
onnx_modelr   quant_utilsr   r   r   r   r   r   r1   ru   rZ   r(   boolrF   r   re   rw   ro   r|   intr   r   rs   r   r   r   rn   rG   rE   <module>r      s  <     B B   %  < !  . 34  48"'	0CI&0S$Y'0 "(3-00  	0
 
0l 37	//'/ "(3-0	/
 
#tEMM"
"#/d +T1 	@#tC%--!8899:	@	@ hu}}56	@ x67		@
 
	@ GK;#x667;S(5==*A%A BC; 
#tC%--001
12;|==05JO--gjemm8DS D# D$sTXY\^c^k^kYkTlOlJm DN Xemm$emm34 9>x?VX]XeXe?e9f 
 4 AkT#u}}"4556u}}5u<= 
#u* 	3	Cc8EMM+B&B!CCD	%--	 (5=="9:EA
 
#tCJ
 rG   