
    g=6                        d dl mZ d dlZd dlmZ d dlZddlmZmZ ddl	m
Z
 ddlmZ 	 	 	 	 	 	 	 	 d	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 dd	Z G d
 d      Z	 	 d	 	 	 	 	 	 	 	 	 ddZy)    )annotationsN)Path   )
FusionGeluFusionLayerNormalization)	ONNXModel   )FusionLpNormalizationc
           	        d}
t        | t        j                        r| nt        j                  |       }t	        |      }t        |      }|j                         rd}
t        |      }|j                         rd}
|rlt        d |j                  D              }|j                  dk  r$t        j                  d|j                   d       nt        |      }|j                         rd}
|s|	r2d}|j                  |      dz   }t        |j                   ||	||	       d}
d
}|j                  |      dz   }|j                   j"                  j$                  D ]Y  }|j&                  dk7  s|j(                  r | |}|dz  }||_        d}
t        j                  d|j&                   d| d       [ |
r,|j+                          t        j,                  |||||||       |
S )a  
    If necessary, this method creates a new "pre-processed" model in preparation for
    quantization of a model to be used in QNN EP. Returns true if a new model was created.

    This method perfoms the following operations:
    - Fuse Erf sequence into a single Gelu node.
    - Fuse ReduceL2 sequence into a single LpNormalization node (p == 2).
    - (Optional) Fuse ReduceMean sequence into a single LayerNormalization node.

    Args:
        model_input: Path to the input model file or ModelProto.
        model_output: Path the output model file, which is only created if this method returns True.
        fuse_layernorm: True if ReduceMean sequences should be fused into LayerNormalization nodes.
            Defaults to False.
        save_as_external_data: True if output model should be saved with external data. Defaults to false.
        all_tensors_to_one_file: Effective only if save_as_external_data is true. Defaults to false.
            If true, save all tensors to one external file specified by external_data_location.
            If false, save each tensor to a file named with the tensor name.
        external_data_location: Effective only if save_as_external_data is true. Defaults to None.
            Specify the external file to which all tensors are saved. Path is relative
            to the model path. If not specified, the model's name is used.
        external_data_size_threshold: Effective only if save_as_external_data is true. Defaults to 1024.
            Tensors with a data size >= external_data_size_threshold are converted to external data.
            To convert every tensor with raw data to external data, set to 0.
        external_data_convert_attribute: Effective only if save_as_external_data is true. Defaults to false.
            If true, convert all tensors to external data.
            If false, convert only non-attribute tensors to external data.
        inputs_to_make_channel_last: List of graph input names to transpose to be "channel-last". For example,
            if "input0" originally has the shape (N, C, D1, D2, ..., Dn), the resulting model will change input0's
            shape to (N, D1, D2, ..., Dn, C) and add a transpose node after it.

            Original:
                input0 (N, C, D1, D2, ..., Dn) --> <Nodes>

            Updated:
                input0 (N, D1, D2, ..., Dn, C) --> Transpose --> input0_chanfirst (N, C, D1, D2, ..., Dn) --> <Nodes>

            This can potentially improve inference latency for QDQ models running on QNN EP because the
            additional transpose node may allow other transpose nodes inserted during ORT layout transformation
            to cancel out.
        outputs_to_make_channel_last: List of graph output names to transpose to be "channel-last". For example,
            if "output0" originally has the shape (N, C, D1, D2, ..., Dn), the resulting model will change output0's
            shape to (N, D1, D2, ..., Dn, C) and add a transpose node before it.

            Original:
                <Nodes> --> output0 (N, C, D1, D2, ..., Dn)

            Updated:
                <Nodes> --> output0_chanfirst (N, C, D1, D2, ..., Dn) --> Transpose --> output0 (N, D1, D2, ..., Dn, C)

            This can potentially improve inference latency for QDQ models running on QNN EP because the
            additional transpose node may allow other transpose nodes inserted during ORT layout transformation
            to cancel out.
    FTc              3  ^   K   | ]%  }|j                   d k(  s|j                   dk(  s"| ' yw) zai.onnxN)domain).0xs     p/var/www/openai/venv/lib/python3.12/site-packages/onnxruntime/quantization/execution_providers/qnn/preprocess.py	<genexpr>z'qnn_preprocess_model.<locals>.<genexpr>d   s)     c%7188r>QXXYbMb!%7s   #--   zUnable to fuse ReduceMean sequence into a LayerNormalization node. ONNX model must use an opset >= 17 in order to use LayerNormalization, but found version z9. Please use onnx.version_converter to update your model.Transpose_channel_r	   )transpose_node_name_prefix transpose_node_name_start_suffixqnn_preproc_node_ConstantzNode of type z" does not have a name. Renamed to .)save_as_external_dataall_tensors_to_one_filelocationsize_thresholdconvert_attribute)
isinstanceonnx
ModelProto
load_modelr   r   applyr
   nextopset_importversionloggingwarningr   get_largest_node_name_suffixupdate_io_to_channel_lastmodelgraphnodeop_typenametopological_sort
save_model)model_inputmodel_outputfuse_layernormr   r   external_data_locationexternal_data_size_thresholdexternal_data_convert_attributeinputs_to_make_channel_lastoutputs_to_make_channel_lastmodifiedr+   
onnx_modelfusion_gelufusion_lpnorm
onnx_opsetfusion_layernormtranspose_node_prefixtranspose_node_suffixunnamed_node_prefixavailable_suffixr-   new_node_names                          r   qnn_preprocess_modelrE      s   D H%k4??CKYdIeE5!J Z(K **5M cU%7%7cc
 "OO%%/%7%7$88qs  8
C%%' #&B 4%/%L%LMb%cfg%g!'('<-B	
  .!>>?RSVWW  &&++<<:%dii234D3GHM!%DIHOOmDLL>9[\i[jjklm , ##%"7$;+7=	
 O    c                  (    e Zd Z	 	 	 	 	 	 ddZddZy)InputOutputNameMapc                J    || _         || _        || _        i | _        g | _        y N)orig_tensor_namesorig_graph_inputsorig_graph_outputsupdated_io_namesnew_value_infos)selfrK   rL   rM   s       r   __init__zInputOutputNameMap.__init__   s,     "3!2"4 "!rF   c                H   || j                   v r| j                   |   S | d}d}| j                  D ]T  }|j                  |      s|t        |      d  j	                         s2t        |t        |      d        }t        ||      }V |dz  }| |}| j                  j                  |      xs | j                  |   }t        j                         }|j                  |       ||_        | j                  j                  |       || j                   |<   | j                   |   S )N_channel_first_r	   )rN   rK   
startswithlenisdigitintmaxrL   getrM   r    ValueInfoProtoCopyFromr/   rO   append)	rP   	orig_nameprefixsuffixtensor_nameindexnew_nameorig_value_infovalue_info_protos	            r   get_new_namezInputOutputNameMap.get_new_name   s'   ---((33 #?311K%%f-+c&km2L2T2T2VKF67VU+ 2
 	!XfZ( 0044Y?e4CZCZ[dCe..0!!/2 (##$45+3i($$Y//rF   N)rK   zset[str]rL   dict[str, onnx.ValueInfoProto]rM   rg   )r^   str)__name__
__module____qualname__rQ   rf    rF   r   rH   rH      s(    
"#
" :
" ;	
"0rF   rH   c           	        t        |xs g       }t        |xs g       }|s|sy | j                  }|j                  D ci c]  }|j                  | }}|j                  D ci c]  }|j                  | }	}|D ]  }
|
|vst        |
 d       |D ]  }||	vst        | d       t               }|j                  t        |             |j                  t        |	             |j                  d |j                  D               t        |||	      }|j                  D ]  }t        t        |j                              D ]  }|j                  |   r=|j                  |   |v r,|j                  |j                  |         |j                  |<   O|j                  |   s_|j                  |   |v sq|j                  |j                  |         |j                  |<    t        t        |j                              D ]?  }|j                  |   |v s|j                  |j                  |         |j                  |<   A " |D ]  }||   }|j                  j                  d      r%|j                  j                  j                  d      st        d|j                   d      |j                  j                  j                  }t        |j                         }|dk  rt        d|j                   d	      t"        j$                  j'                         }|j)                  |j                   d
          t        d
|d
z
        D ]0  }|j                   |   j)                  |j                   |d
z             2 |j                   |d
z
     j)                  |       t+        t        |            }t        |      D ]  }|d
k  r|n|d
z
  ||<    |d
z
  |d
<   t"        j,                  j/                  d| ||j                  g|j                  |j                        g|      }|d
z  }|j                  j1                  |g        |D ]  }|	|   }|j                  j                  d      r%|j                  j                  j                  d      st        d|j                   d      |j                  j                  j                  }t        |j                         }|dk  rt        d|j                   d	      t"        j$                  j'                         }|j)                  |j                   d
          t        d
|d
z
        D ]0  }|j                   |   j)                  |j                   |d
z             2 |j                   |d
z
     j)                  |       t+        t        |            }t        |      D ]  }|dk(  r|n|d
z   ||<    d
||d
z
  <   t"        j,                  j/                  d| ||j                  |j                        g|j                  g|      }|d
z  }|j                  j1                  |g        |j2                  j1                  |j4                         y c c}w c c}w )Nz is not a graph inputz is not a graph outputc              3  H   K   | ]  }|j                   D ]	  }|s|   y wrJ   )input)r   r-   
input_names      r   r   z,update_io_to_channel_last.<locals>.<genexpr>   s$     jJDQUQ[Q[:_iZQ[ZJs   "	"tensor_typeshapezExpected input z# to have a tensor_type with a shaper   z to be of rank >= 3r	   	Transpose)r/   inputsoutputspermzExpected output r   )setr,   ro   r/   output
ValueErrorupdater-   rH   rangerV   rf   typeHasFieldrq   rr   dimr    TensorShapeProto	Dimensionr\   listhelper	make_nodeextend
value_inforO   )r+   inputs_to_updateoutputs_to_updater   r   r,   ginputrL   goutputrM   rp   output_namerK   io_mapr-   ig_input_nameg_inputinput_shape
input_rankchannel_dimtranspose_permtranspose_nodeg_output_nameg_outputoutput_shapeoutput_ranks                              r   r*   r*      s    +1r2-34$5KKE;@;;G;f,;G?D||L|G',,/|L '
..
|+@ABB ' )00},BCDD ) S!234S!345jEJJjj   13DFXYF 

s4::'Azz!}A2B!B & 3 3DJJqM B

1A4::a=4E#E & 3 3DJJqM B

1	 ( s4;;'(A{{1~!22!'!4!4T[[^!DA )  )#L1||$$]37<<;S;S;\;\]d;ew||n<_`aall..44)
>w||n<OPQQ++557[__Q/0q*q.)AOOA''A(>? *
Q'00=eJ/0z"A%&UAN1 #&Nq.../0P/STLL>((67 / 
 	)A-(

>*+A )F +%m4}}%%m4HMM<U<U<^<^_f<g/>abcc}}0066,**+?/>QRSS++557\--a01q+/*AQ(()9)9!a%)@A +q)22;?eK01{#A%&!VQN1 $*+{Q'.../0P/ST''67]]O / 
 	)A-(

>*+? +B 
F223K HLs   Y0"Y5)FFFNi   FNN)r2   zstr | Path | onnx.ModelProtor3   z
str | Pathr4   boolr   r   r   r   r5   z
str | Noner6   rX   r7   r   r8   list[str] | Noner9   r   returnr   )r   r   )
r+   zonnx.ModelProtor   r   r   r   r   rh   r   rX   )
__future__r   r'   pathlibr   r    fusionsr   r   r;   r   r=   r
   rE   rH   r*   rl   rF   r   <module>r      s    #    ; # 0 !"'$))-(,,14859D-DD D  	D
 "D 'D #&D &*D "2D #3D 
DN$0 $0V ';,-s4s4&s4 (s4 !$	s4
 '*s4rF   