Ë
    ªgŠ<  ã                   óÊ  — d Z ddlZddlZddlZddlmZ ddlmZmZm	Z	m
Z
mZmZ ddlZddlZddlmZmZ ddlZddlmZmZ ddlmZ dd	lmZmZmZmZmZmZ d
Z ee«      Z 	 	 d.dee!ef   dee!ef   de
ee!      de"ddf
d„Z#	 	 d/de!dede
ee!      dee!e	ejH                     f   fd„Z%edz   Z&dee!ee!eejH                     f   f   de!de
eejH                        de
eejH                        ddf
d„Z'	 d0dee!eejH                     f   de
ee!eejH                     f      dee!ee!eejH                     f   f   fd„Z(dejH                  dejH                  d ejH                  d!e)de
ejH                     f
d"„Z*d#e!d$e!dee!ee!ejH                  f   f   fd%„Z+d&eeejH                     ejH                  f   d'eeejH                     ejH                  f   de,fd(„Z-e-fd)ee!ee!ejH                  f   f   d*eejH                  ejH                  ge,f   dee!e,f   fd+„Z.e-fd,ee!ee!eejH                     f   f   d*eeejH                     eejH                     ge,f   dee!ee!e,f   f   fd-„Z/y)1aÇ  Utilities to run a given ONNX model, while saving input/output tensors of
eligible operator nodes.

A use case is to debug quantization induced accuracy drop. An AI engineer can
run the original float32 model and the quantized model with the same inputs,
then compare the corresponding activations between the two models to find
where the divergence is.

Example Usage:

```python
    class ExampleDataReader(CalibrationDataReader):
        def __init__(self):
            ...
        def get_next(self):
            ...

    input_data_reader = ExampleDataReader()

    augmented_model_path = str(Path(self._tmp_model_dir.name).joinpath("augmented_model.onnx"))
    modify_model_output_intermediate_tensors (path_to_onnx_model, augmented_model_path)

    tensor_dict = collect_activations(augmented_model_path, input_data_reader)
```

`tensor_dict` points to a dictionary where the keys are tensor names and each value
is a list of tensors, one from each model run

é    N)ÚPath)ÚCallableÚDictÚListÚOptionalÚSequenceÚUnion)ÚhelperÚnumpy_helperé   )ÚCalibraterBaseÚCalibrationDataReader)Ú	ONNXModel)ÚDEQUANT_OP_NAMEÚDEQUANT_OUTPUT_SUFFIXÚQUANT_INPUT_SUFFIXÚTENSOR_NAME_QUANT_SUFFIXÚfind_by_nameÚload_model_with_shape_inferÚ_ReshapedSavedOutputÚinput_model_pathÚoutput_model_pathÚop_types_for_savingÚsave_as_external_dataÚreturnc                 ó  — |€g }t        | |¬«      }|j                  }|j                  |«      \  }}dt        t	        j                  «       «      z   }t        j                  t        j                  dgt        j                  ¬«      |«      }	|j                  j                  j                  |	«       |D ]´  }
|
t        z   }t        j                  j!                  d|
|g|g|¬«      }|j                  j"                  j                  |«       t        j$                  |||
   j&                  j(                  j*                  dg«      }|j                  j,                  j                  |«       Œ¶ t        j.                  |||¬«       y)	aà  Augment a given ONNX model to save node input/output tensors.

    Add all input/output tensors of operator nodes to model outputs
    so that their values can be retrieved for debugging purposes.

    Args:
        input_model: the path to load the model.
        op_types_for_saving: Operator types for which the
                input/output should be saved. By default, saving all the
                float32/float16 tensors.

    Returns:
        The augmented ONNX model
    N)Úop_types_to_calibrateÚLinearReshape_éÿÿÿÿ©ÚdtypeÚReshape)ÚinputsÚoutputsÚname)r   )r   ÚmodelÚselect_tensors_to_calibrateÚstrÚtimer   Ú
from_arrayÚnumpyÚarrayÚint64ÚgraphÚinitializerÚappendÚ_TENSOR_SAVE_POSTFIXÚonnxr
   Ú	make_nodeÚnodeÚmake_tensor_value_infoÚtypeÚtensor_typeÚ	elem_typeÚoutputÚsave)r   r   r   r   ÚsaverÚmodel_to_augmentÚtensorsÚvalue_infosÚreshape_shape_nameÚreshape_shapeÚtensor_nameÚreshape_outputÚreshape_nodeÚreshape_output_value_infos                 ú\/var/www/openai/venv/lib/python3.12/site-packages/onnxruntime/quantization/qdq_loss_debug.pyÚ(modify_model_output_intermediate_tensorsrF   @   sX  € ð* Ð"Ø ÐÜÐ+ÐCVÔW€EØ—{‘{ÐØ ×<Ñ<Ð=MÓNÑ€Gˆ[Ø)¬C´·	±	³Ó,<Ñ<ÐÜ ×+Ñ+¬E¯K©K¸¸ÄEÇKÁKÔ,PÐRdÓe€MØ×Ñ×&Ñ&×-Ñ-¨mÔ<ãˆØ$Ô';Ñ;ˆÜ—{‘{×,Ñ,ØØÐ!3Ð4Ø#Ð$Øð	 -ó 
ˆð 	×Ñ×#Ñ#×*Ñ*¨<Ô8Ü$*×$AÑ$AØ˜K¨Ñ4×9Ñ9×EÑE×OÑOÐRTÐQUó%
Ð!ð 	×Ñ×%Ñ%×,Ñ,Ð-FÕGð ô 	‡IIØØØ3öó    Úaugmented_modelÚinput_readerÚexecution_providersc                 ó  — |€3t        j                  «       }t         j                  j                  |_        |€dg}t        j
                  | ||¬«      }g }|D ]#  }|j                  |j                  d|«      «       Œ% |st        d«      ‚i }|j                  «       }|D ]k  }	t        ||	«      D ]Z  \  }
}|
j                  j                  t        «      sŒ&|
j                  dt          }|j                  |g «      j                  |«       Œ\ Œm |S )a´  Run augmented model and collect activations tensors.

    Args:
        augmented_model: Path to augmented model created by modify_model_output_intermediate_tensors ()
        input_reader: Logic for reading input for the model, augmented model have the same
            input with the original model.
        session_options: Optional OnnxRuntime session options for controlling model run.
            By default graph optimization is turned off
        execution_providers: Collection of execution providers for running the model.
            Only CPU EP is used by default.

    Returns:
        A dictionary where the key is tensor name and values are list of tensors from each batch
    NÚCPUExecutionProvider)Úsess_optionsÚ	providersz3No data is collected while running augmented model!)ÚonnxruntimeÚSessionOptionsÚGraphOptimizationLevelÚORT_DISABLE_ALLÚgraph_optimization_levelÚInferenceSessionr0   ÚrunÚRuntimeErrorÚget_outputsÚzipr%   Úendswithr1   Ú_TENSOR_SAVE_POSTFIX_LENÚ
setdefault)rH   rI   Úsession_optionsrJ   Úinference_sessionÚintermediate_outputsÚinput_dÚoutput_dictÚoutput_infoÚbatchr9   Úoutput_dataÚoutput_names                rE   Úcollect_activationsre   s   s  € ð* ÐÜ%×4Ñ4Ó6ˆÜ3>×3UÑ3U×3eÑ3eˆÔ0ØÐ"Ø5Ð6Ðä#×4Ñ4ØØ$Ø%ôÐð ÐÛˆØ×#Ñ#Ð$5×$9Ñ$9¸$ÀÓ$HÕIð  áÜÐPÓQÐQà€KØ#×/Ñ/Ó1€KÛ%ˆÜ#& {°EÖ#:ÑˆFKØ{‰{×#Ñ#Ô$8Õ9Ø$Ÿk™kÐ*DÔ,DÐ+DÐEØ×&Ñ& {°BÓ7×>Ñ>¸{ÕKñ $;ð &ð ÐrG   Ú_1Úqdq_cmpÚactivation_nameÚpre_qdq_tensorsÚpost_qdq_tensorsc                 ó:   — ||i | |<   || |   d<   || |   d<   y y y )NÚpre_qdqÚpost_qdq© )rg   rh   ri   rj   s       rE   Ú_add_pre_post_qdq_pairro   ¨   s@   € ð Ð#¨Ð(CØ#%ˆÑ Ø.=ˆÑ  Ñ+Ø/?ˆÑ  Ò,ð )DÐ#rG   Úqdq_activationsÚfloat_activationsc                 ó`  — i }| j                  «       D ]ã  \  }}|j                  t        «      r5|dt        t        «        }| j	                  |«      }|}t        ||||«       ŒP|j                  t        «      r5|dt        t        «        }| j	                  |«      }|}t        ||||«       Œš|j                  t        «      sŒ°|dt        t        «        }| j	                  |«      }|}t        ||||«       Œå |s|S |j                  «       D ]  \  }}	|j	                  |«      }
|
€Œ|
|	d<   Œ  |S )a©  Comparing activation values to help debugging accuracy loss due to quantization.

    This functions takes saved activations from the QDQ model and (optionally) the
    float point model, and provides a data structure for comparing:
        * from the qdq model, activation values before and after QDQ operation
        * across both models, activations from the orignal model vs the corresponding
          activations in the QDQ model

    Arg:
        qdq_activations: Output of `collect_activations`. This must be from a quantized
            model with QDQ format.
        float_activations: Output of `collect_activations`. This must be from the float
            point model.

    Returns:
        Dict for comparing pre and post quantized activation tensors. E.g.
        ```
        qdq_cmp = cmp_qdq_input_output(qdq_activations)
        print(qdq_cmp['activation1']['pre_qdq'][0])
        print(qdq_cmp['activation1'][`post_qdq'][0])


        qdq_cmp = cmp_qdq_input_output(qdq_activations, float_activations)
        print(qdq_cmp['activation1']['float'][0])
        print(qdq_cmp['activation1']['pre_qdq'][0])
        print(qdq_cmp['activation1'][`post_qdq'][0])
        ```
    NÚfloat)ÚitemsrY   r   ÚlenÚgetro   r   Ú_POST_QDQ_POSTFIX1)rp   rq   rg   rA   r=   Úpre_namerj   ri   Úact_nameÚ
act_valuesÚ
float_actss              rE   Úcreate_activation_matchingr|   ´   sG  € ðB >@€GØ /× 5Ñ 5Ö 7ÑˆWØ×ÑÔ 2Ô3Ø"Ð#=¤cÔ*<Ó&=Ð%=Ð>ˆHØ.×2Ñ2°8Ó<ÐØ%ˆOÜ" 7¨H°oÐGWÕXØ×!Ñ!Ô"7Ô8Ø"Ð#@¤cÔ*?Ó&@Ð%@ÐAˆHØ-×1Ñ1°(Ó;ˆOØ&ÐÜ" 7¨H°oÐGWÕXØ×!Ñ!Ô"4Õ5Ø"Ð#=¤cÔ*<Ó&=Ð%=Ð>ˆHØ-×1Ñ1°(Ó;ˆOØ&ÐÜ" 7¨H°oÐGWÕXð !8ñ" Øˆà '§¡¦Ñˆ*Ø&×*Ñ*¨8Ó4ˆ
ØÑ!Ø",ˆJwÒð !0ð
 €NrG   Úweight_tensorÚweight_scaleÚ	weight_zpÚchannel_axisc                 óJ  — |j                   |j                   k(  sJ ‚|j                  dk(  r| |z
  |z  S |j                  dk(  sJ ‚t        | j                   «      }d||<   | j                   |   }d }t	        |«      D ]ˆ  }| j                  ||«      }|||   z
  ||   z  }	|dk(  r%t        j                  |	«      j                  |«      }ŒMt        j                  |	«      j                  |«      }
t        j                  ||
f|«      }ŒŠ |€y |j                  | j                   «       |S )Nr   r   )
ÚshapeÚsizeÚndimÚlistÚrangeÚtaker+   ÚasarrayÚreshapeÚconcatenate)r}   r~   r   r€   Úreshape_dimsÚchannel_countÚdequantized_weightsÚiÚper_channel_dataÚdequantized_per_channel_dataÚchannel_weightss              rE   Ú_run_dequantize_linearr’   ò   s2  € ð ×Ñ §¡Ò0Ð0Ð0Ø‡~~˜ÒØ 	Ñ)¨\Ñ9Ð9à>‰>˜QÒÐÐÜ˜×+Ñ+Ó,€LØ!"€LÑØ!×'Ñ'¨Ñ5€MØÐÜ=Ö!ˆØ(×-Ñ-¨a°Ó>ÐØ(8¸9ÀQ¹<Ñ(GÈ<ÐXYÉ?Ñ'ZÐ$ØŠ6Ü"'§-¡-Ð0LÓ"M×"UÑ"UÐVbÓ"cÑä#Ÿm™mÐ,HÓI×QÑQÐR^Ó_ˆOÜ"'×"3Ñ"3Ð5HÈ/Ð4ZÐ\hÓ"iÑð "ð Ð"Øà×Ñ × 3Ñ 3Ô4ØÐrG   Úfloat_model_pathÚqdq_model_pathc                 ó²  — t        t        t        | «      «      «      }t        t        t        |«      «      «      }i }|j                  «       }|j	                  «       D ]v  }|j
                  t        k7  rŒ|j                  d   }t        ||«      }|sŒ6|j                  t        «      st        j                  d|› d|› d«       Œhd}	|j                  D ]  }
|
j                  dk(  sŒ|
j                  }	Œ  t!        j"                  |«      }t!        j"                  t        |j                  d   |«      «      }t%        |j                  «      dkD  r-t!        j"                  t        |j                  d   |«      «      }n/t'        j(                  |j*                  t&        j,                  ¬	«      }|j.                  |j.                  cxk(  rdk(  r5n n2|j1                  t3        «       «      }|j1                  t3        «       «      }|j*                  |j*                  k7  r%t5        d
|j*                  › d|j*                  › «      ‚t7        ||||	¬«      }|dt%        t        «        }|€t        j                  d|› d|› d«       Œ t        ||j                  «       «      }|st        j                  d| › d|› d«       ŒZt!        j"                  |«      }||dœ||<   Œy |S )aˆ  Comparing weight values to help debugging accuracy loss due to quantization.

    This functions takes the float model and the qdq model, and provides a data structure for comparing
    their corresponding weights to locate quantization errors

    Arg:
        float_model_path: Path points to the float point model.
        qdq_model_path: Path points to the qdq model.

    Returns:
        Dict for comparing weight tensors. E.g.
        ```
        qdq_weight_cmp = create_weight_matching(float_model, qdq_model)
        print(qdq_weight_cmp['activation1']['float'])
        print(qdq_weight_cmp['activation1']['dequantized'])
        ```
    r   zModel Error in 'z': Dequantized tensor name 'z' not recognized!r   Úaxisr   é   r    z2scale and zero_point must have the same shape but z != )r€   Nz': 'z'' per-channel quantization on 0 channelz': weight tensor 'z' not found!)rs   Údequantized)r   r   r   r/   ÚnodesÚop_typer   Úinputr   rY   r   ÚloggingÚerrorÚ	attributer%   rŽ   r   Úto_arrayru   r+   Úzerosr‚   Úint32rƒ   r‰   ÚtuplerV   r’   )r“   r”   Úfloat_onnx_modelÚqdq_onnx_modelÚmatched_weightsÚinitializersr4   Úweight_nameÚweight_valuesr–   Úattrr}   r~   r   Úweight_quantÚfloat_valuesÚweight_floats                    rE   Úcreate_weight_matchingr­     sŒ  € ô$ !Ô!<¼TÐBRÓ=SÓ!TÓUÐÜÔ:¼4ÀÓ;OÓPÓQ€Nà;=€OØ!×-Ñ-Ó/€LØ×$Ñ$×&ˆØ<‰<œ?Ò*ØØŸ:™: a™=ˆÜ$ [°,Ó?ˆÙØØ×#Ñ#Ô$<Ô=ÜM‰MÐ,¨^Ð,<Ð<XÐYdÐXeÐevÐwÔxØàˆØ—N”NˆDØy‰y˜FÓ"Ø—v‘v‘ð #ô %×-Ñ-¨mÓ<ˆÜ#×,Ñ,¬\¸$¿*¹*ÀQ¹-ÈÓ-VÓWˆÜˆtz‰z‹?˜QÒÜ$×-Ñ-¬l¸4¿:¹:Àa¹=È,Ó.WÓX‰IäŸ™ L×$6Ñ$6¼e¿k¹kÔJˆIð ×Ñ 	§¡Ô3°!Õ3à'×/Ñ/´³Ó8ˆLØ!×)Ñ)¬%«'Ó2ˆIØ×Ñ §¡Ò0ÜØDÀ\×EWÑEWÐDXÐX\Ð]f×]lÑ]lÐ\mÐnóð ô .¨m¸\È9ÐcgÔhˆØ!Ð"B¤SÔ)AÓ%BÐ$BÐCˆØÐÜM‰MÐ,¨^Ð,<¸DÀÀÐMtÐuÔvÙä# KÐ1A×1MÑ1MÓ1OÓPˆÙÜM‰MÐ,Ð-=Ð,>Ð>PÐQ\ÐP]Ð]iÐjÔkÙÜ#×,Ñ,¨\Ó:ˆØ1=ÈlÑ'[ˆ˜Ó$ðW 'ðZ ÐrG   ÚxÚyc                 ót  — t        | t        j                  «      r| g}n| }t        |t        j                  «      r|g}n|}t        |«      t        |«      k7  rt	        d«      ‚t        j
                  |«      j                  «       }t        j
                  |«      j                  «       }t        j                  d«      j                  }t        t        j                  j                  |«      |«      }t        t        j                  j                  ||z
  «      |«      }||z  }	dt        j                  |	«      z  S )Nz%Unequal number of tensors to compare!rs   é   )Ú
isinstancer+   Úndarrayru   rV   rŠ   ÚflattenÚfinfoÚepsÚmaxÚlinalgÚnormÚmathÚlog10)
r®   r¯   ÚxlistÚylistÚleftÚrightÚepsilonÚtensor_normÚ	diff_normÚress
             rE   Ú*compute_signal_to_quantization_noice_ratiorÄ   U  sñ   € ô !”U—]‘]Ô#Ø‰àˆÜ!”U—]‘]Ô#Ø‰àˆÜ
ˆ5ƒz”S˜“ZÒÜÐBÓCÐCä×Ñ˜UÓ#×+Ñ+Ó-€DÜ×Ñ˜eÓ$×,Ñ,Ó.€Eäk‰k˜'Ó"×&Ñ&€GÜ”e—l‘l×'Ñ'¨Ó-¨wÓ7€KÜ”E—L‘L×%Ñ% d¨U¡lÓ3°WÓ=€IØ
˜	Ñ
!€CØ”—
‘
˜3“ÑÐrG   Úweights_matchÚerr_funcc                 ó^   — i }| j                  «       D ]  \  }} ||d   |d   «      ||<   Œ |S )Nrs   r˜   ©rt   )rÅ   rÆ   Úresultr§   Úweight_matchs        rE   Úcompute_weight_errorrË   m  sA   € ð  "€FØ%2×%8Ñ%8Ö%:Ñ!ˆ\Ù& |°GÑ'<¸lÈ=Ñ>YÓZˆˆ{Òð &;à€MrG   Úactivations_matchc                 ó˜   — i }| j                  «       D ]4  \  }}i } ||d   |d   «      |d<   |d   }|r |||d   «      |d<   |||<   Œ6 |S )Nrl   rm   Úqdq_errrs   Ú
xmodel_errrÈ   )rÌ   rÆ   rÉ   r%   ÚmatchÚ
err_resultÚfloat_activations          rE   Úcompute_activation_errorrÓ   w  st   € ð +-€FØ(×.Ñ.Ö0‰ˆˆeØ')ˆ
Ù (¨¨yÑ)9¸5ÀÑ;LÓ Mˆ
9ÑØ  ™>ÐÙÙ'/Ð0@À%È
ÑBSÓ'TˆJ|Ñ$Ø!ˆˆtŠð 1ð €MrG   )NF)NN)N)0Ú__doc__rœ   rº   r)   Úpathlibr   Útypingr   r   r   r   r   r	   r+   r2   r
   r   rO   Ú	calibrater   r   Ú
onnx_modelr   Úquant_utilsr   r   r   r   r   r   r1   ru   rZ   r(   ÚboolrF   r³   re   rw   ro   r|   Úintr’   r­   rs   rÄ   rË   rÓ   rn   rG   rE   Ú<module>rÜ      sƒ  ðñó< Û Û Ý ß B× Bã Û ß %ã ç <Ý !÷÷ ð .Ð ÙÐ3Ó4Ð ð 48Ø"'ñ	0Ø˜C ˜IÑ&ð0à˜S $˜YÑ'ð0ð " (¨3¡-Ñ0ð0ð  ð	0ð
 
ó0ðl Ø37ñ	/Øð/à'ð/ð " (¨3¡-Ñ0ð	/ð
 
ˆ#ˆtE—M‘MÑ"Ð
"Ñ#ó/ðd +¨TÑ1Ð ð	@Ø#t˜C ¨%¯-©-Ñ!8Ð8Ñ9Ð9Ñ:ð	@àð	@ð ˜h u§}¡}Ñ5Ñ6ð	@ð ˜x¨¯©Ñ6Ñ7ð		@ð
 
ó	@ð GKñ;Ø˜#˜x¨¯©Ñ6Ð6Ñ7ð;à  S¨(°5·=±=Ñ*AÐ%AÑ BÑCð;ð 
ˆ#ˆtC˜ %§-¡-Ñ0Ð0Ñ1Ð
1Ñ2ó;ð|Ø—=‘=ðØ05·±ðØJOÏ-É-ðØgjðàˆem‰mÑóð8D¨Sð DÀ#ð DÈ$ÈsÐTXÐY\Ð^c×^kÑ^kÐYkÑTlÐOlÑJmó DðN ØˆXe—m‘mÑ$ e§m¡mÐ3Ñ4ð Ø9>¸xÈÏÉÑ?VÐX]×XeÑXeÐ?eÑ9fð à
ó ð4 AkñØ˜˜T # u§}¡}Ð"4Ñ5Ð5Ñ6ðà˜Ÿ™ u§}¡}Ð5°uÐ<Ñ=ðð 
ˆ#ˆuˆ*Ñóð 	3ñ	Ø˜C  c¨8°E·M±MÑ+BÐ&BÑ!CÐCÑDðàØ	%—-‘-Ñ	  (¨5¯=©=Ñ"9Ð:¸EÐAñðð
 
ˆ#ˆtC˜JÑÐ
Ñ ôrG   