
    gS                        d Z ddlZddlZddlZddlZddlmZ ddlmZmZm	Z	 ddl
Z
ddlmZ ddlmZ ddlmZmZmZ ddlmZmZmZ dd	lmZ  G d
 d      Zd"dedefdZd"dedefdZ G d d      ZdefdZdefdZ	 d"dej>                  de	ej@                     fdZ!d Z"d Z#e$dk(  r e#       Z% e&de%        e%jN                  e%jP                  dk(  rdnde%_'        e%jR                  r>e
jT                  jW                         sJ e%jX                  dk(  r/d  e       v sJ d!e%_-        ne%j\                  rJ e%jZ                  rJ e%j\                  se%jZ                  r	 e!e%       y e"e%       yy)#z]
Benchmark performance of SAM2 encoder with ORT or PyTorch. See benchmark_sam2.sh for usage.
    N)datetime)ListMappingOptional)SAM2ImageDecoder)SAM2ImageEncoder)decoder_shape_dictencoder_shape_dictload_sam2_model)InferenceSessionSessionOptionsget_available_providers)CudaSessionc            )           e Zd Zddddddddddddej                  ddddd	dfd
edededej                  dedededededededededededededededef(dZ	d Z
d eeee   f   fd!Zd eeej                  f   fd"Zy#)$
TestConfigimage_encoderCPUExecutionProvidermax-autotune      FT     
model_type	onnx_pathsam2_dirdevice	component
batch_sizeheightwidth
num_labels
num_points	num_masksmulti_mask_outputuse_tf32enable_cuda_graphprefer_nhwcwarm_upenable_nvtx_profileenable_torch_profilerepeatsverbosec                    |dv sJ |	dk\  r|	dk  sJ |
dk\  r|
dk  sJ || _         || _        || _        || _        || _        || _        || _        |	| _        |
| _        || _	        || _
        || _        || _        || _        || _        || _        || _        || _        || _        || _        || _        || _        || _        | j                  dk(  r&| j                  dk(  r| j                  dk(  sJ d       y y )Nsam2_hiera_tinysam2_hiera_smallsam2_hiera_largesam2_hiera_base_plus   i   r   r   z7Only image size 1024x1024 is allowed for image encoder.)r   r   r   r   providertorch_compile_moder   r   r    r!   r"   r#   r$   r   r%   r&   dtyper'   r(   r)   r*   r+   r,   )selfr   r   r   r   r   r4   r5   r   r   r    r!   r"   r#   r$   r%   r&   r6   r'   r(   r)   r*   r+   r,   s                           h/var/www/openai/venv/lib/python3.12/site-packages/onnxruntime/transformers/models/sam2/benchmark_sam2.py__init__zTestConfig.__init__   s   4 pppp}4//|--$" " "4$
$$"!2 !2
&#6 $8!>>_,;;$&4::+=x?xx=+= -    c                     t        |        S N)varsr7   s    r8   __repr__zTestConfig.__repr__T   s    t*r:   returnc                     | j                   dk(  r+t        | j                  | j                  | j                        S t        | j                  | j                  | j                  | j                  | j                        S )Nr   )	r   r
   r   r   r    r	   r!   r"   r#   r>   s    r8   
shape_dictzTestConfig.shape_dictW   sV    >>_,%doot{{DJJOO%dkk4::tPTP_P_aeaoaoppr:   c                    | j                   }| j                  dk(  rEdt        j                  | j                  d| j
                  | j                  || j                        iS t        j                  dddd|| j                        t        j                  ddd	d	|| j                        t        j                  dddd|| j                        t        j                  d
d| j                  | j                  df|| j                        t        j                  d
d| j                  | j                  ft        j                  | j                        t        j                  | j                  ddd|| j                        t        j                  | j                  || j                        t        j                  | j
                  | j                  gt        j                  | j                        dS )Nr   image   )r6   r   r          @      r   r      )image_features_0image_features_1image_embeddingspoint_coordspoint_labelsinput_maskshas_input_masksoriginal_image_size)r6   r   torchrandnr   r   r    r   randrandintr!   r"   int32zerosonestensor)r7   r6   s     r8   random_inputszTestConfig.random_inputs]   ss   

>>_,U[[!T[[$**\ajnjujuvww %*JJq"c3eTXT_T_$`$)JJq"c3eTXT_T_$`$)JJq#r2USWS^S^$_ %tdootB%X\XcXc! !&q4??DOO<EKKX\XcXc!  %{{4??AsCu]a]h]hi#(::dooUSWS^S^#_',||T[[$**4MUZU`U`imitit'u r:   N)__name__
__module____qualname__rS   float32strr   intboolr9   r?   r   r   rB   Tensorr[    r:   r8   r   r      s`    )')"'"'mm!$)%*17y7y 7y 	7y
 7y 7y 7y 7y 7y 7y 7y 7y  7y  !7y"  #7y& '7y( )7y* "+7y, #-7y. /7y0 17yrqGCcN3 qwsELL'89 r:   r   configr@   c                    | j                   rt        dt        |               | j                  dk(  rt	        | j
                  t              rt        j                  j                         n| j
                  j                  }t        j                  || j                        }t        | j                        |d<   | j                   rd|d<   | j                  |fdg}ndg}t#        | j$                  ||      }|S )Nzcreate session for CUDAExecutionProviderr%   r   r'   r   )	providers)r,   printr=   r4   
isinstancer   r`   rS   cudacurrent_deviceindexr   get_cuda_provider_optionsr&   ra   r%   r'   r   r   )re   session_options	device_idprovider_optionsrh   ort_sessions         r8   create_ort_sessionrs   r   s    ~~#DL>23113=fmmS3QEJJ--/W]WdWdWjWj	&@@FLdLde'*6??';$./]+oo'78:PQ	+,	"6#3#3_PYZKr:   c                     t        | |      }t        || j                  | j                        }|j	                  | j                                |S r<   )rs   r   r   r&   allocate_buffersrB   )re   ro   rr   cuda_sessions       r8   create_sessionrw      sC    $V_=K{FMM6;S;STL!!&"3"3"56r:   c                   $    e Zd ZdZddefdZd Zy)OrtTestSessionz;A wrapper of ORT session to test relevance and performance.Nre   c                 P    t        ||      | _        |j                         | _        y r<   )rw   rr   r[   	feed_dict)r7   re   ro   s      r8   r9   zOrtTestSession.__init__   s!    )&/B--/r:   c                 L    | j                   j                  | j                        S r<   )rr   inferr{   r>   s    r8   r}   zOrtTestSession.infer   s    %%dnn55r:   r<   )r\   r]   r^   __doc__r   r9   r}   rd   r:   r8   ry   ry      s    E0z 06r:   ry   rv   c                 ~    t        j                          }| j                  |      }t        j                          }||z
  S r<   )timer}   )rv   
input_dictstart_ends        r8   measure_latencyr      s2    IIKE:&A
))+C;r:   c                    | j                   j                  }|dk(  }|rt        j                  j	                  d      j
                  dk\  rT| j                  rHdt        j                  j                  j                  _	        dt        j                  j                  _	        |xr | j                  t        j                  k7  }| j                         }t        j                         5  t        j                  || j                  |      5  t!        | j"                  | j$                  | j                         }| j&                  dk(  r|rU| j(                  dk7  rFt        j*                  |j,                  j.                  | j(                  dd	
      |j,                  _        | j1                         d   }t        j2                  |      j5                  | j                   | j                        }t7        |      }|r(| j(                  dk7  rt9        d| j(                   d       t;        | j<                        D ]  }	 ||      \  }
}} |re| j>                  rYdd l }ddlm!} |jE                          t9        d       |jG                  d      5   ||d       d d d        |jI                          |r| jJ                  rt        jL                  jO                  t        jL                  jP                  jR                  t        jL                  jP                  jT                  gd      5 }t9        d       t        jL                  jW                  d      5   ||       d d d        d d d        t9        jY                         j[                  dd             |j]                  d       | j^                  dk(  r	 d d d        d d d        y t9        d| j^                   d       ta        j`                         }t;        | j^                        D ]/  }	 ||      \  }
}}|st        j                  jc                          1 n|d   |d   |d   |d   |d    |d!   |d"   |d#   f}te        || jf                  $      }|rA| j(                  dk7  r2t        j*                  |j.                  | j(                  dd	
      |_        t;        | j<                        D ]  }	 || \  }}} |rc| j>                  rWdd l }ddlm!} |jE                          t9        d%       |jG                  d      5   ||d&di d d d        |jI                          |r| jJ                  rt        jL                  jO                  t        jL                  jP                  jR                  t        jL                  jP                  jT                  gd      5 }t9        d'       t        jL                  jW                  d(      5   ||  d d d        d d d        t9        jY                         j[                  dd             |j]                  d)       | j^                  dk(  r	 d d d        d d d        y t9        d| j^                   d       ta        j`                         }t;        | j^                        D ],  }	 || \  }}}|st        j                  jc                          . ta        j`                         }||z
  | j^                  z  cd d d        cd d d        S # 1 sw Y   dxY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   xY w# 1 sw Y   KxY w# 1 sw Y   PxY w# 1 sw Y   nxY wd d d        y # 1 sw Y   y xY w)*Nrk   r      T)device_typer6   enabled)r   r   noneF)mode	fullgraphdynamicrD   )r   r6   zBRunning warm up. It will take a while since torch compile mode is .cudartz#Start nvtx profiling on encoder ...one_run)r)   )
activitiesrecord_shapesz$Start torch profiling on encoder ...encodercuda_time_total
   )sort_by	row_limitztorch_image_encoder.jsonzStart z runs of performance tests...rK   rL   rM   rN   rO   rP   rQ   rR   )multimask_outputz"Start nvtx profiling on decoder...r)   z$Start torch profiling on decoder ...decoderztorch_image_decoder.json)4r   typerS   rk   get_device_propertiesmajorr%   backendsmatmul
allow_tf32cudnnr6   r_   r[   inference_modeautocastr   r   r   r   r5   compiler   forwardrB   rT   tor   ri   ranger(   r)   nvtxr   cudaProfilerStartannotatecudaProfilerStopr*   profilerprofileProfilerActivityCPUCUDArecord_functionkey_averagestableexport_chrome_tracer+   r   synchronizer   r$   )re   r   is_cudaenabled_auto_cast
ort_inputs
sam2_modelimage_shapeimgsam2_encoderr   _image_features_0_image_features_1_image_embeddingsr   r   profr   torch_inputssam2_decoder_masks_iou_predictions_low_res_masksr   s                          r8   	run_torchr      s   --$$KV#G 5::33A6<<Afoo04""-*.'AFLLEMM$A%%'J				Kv||ev!w$V__f6G6GPVP]P]^
.644>38==,,4422"!	4
((0 !++-g6K++k*--V]]&,,-WC+J7L644>Z[a[t[tZuuvwx6>>*JVWZJ[G!#46G + 655'((*;<]]9- $? .'')666^^++ % ? ? C CU^^EdEdEiEij"& ,  @A77	B$S) C d'')//8IUW/XY(()CD~~"W "x		Z F6>>**GHIIIKE6>>*JVWZJ[G!#46GJJ**, + -.-.-.>*>*=),-01	L ,!'!9!9L
 644>',}} ((22"!	($ 6>>*;G;V8(. + 655'((*:;]]9- ,IDI .'')666^^++ % ? ? C CU^^EdEdEiEij"& ,  @A77	B$l3 C d'')//8IUW/XY(()CD~~"M "x		P F6>>**GHIIIKE6>>*;G;V8(.JJ**, +
 iikev~~-a "x!w		6 .- CB l .- CB w "x!w			s   ##]F ][3B]+\		\ 	\A]-]>A]D]	\ B]-+\4\'	\4&A]9]
A]A] 	]3[=8] \
\\]\$]'\1,\44\>9]]
	]]args
csv_writerc                 0	   | j                   }| j                  }| j                  }|r7t        j                  j                         }t        j                  d|      }d}nd}t        j                  d      }d}d}t        j                  t        j                  t        j                  d}t        d*i d| j                  d	| j                  d
| j                  d| j                  d|d| j                  d| j                   d| j"                  d|ddd|d|| j$                     d| j&                  d| j                  d| j(                  d| j*                  d| j,                  d| j.                  dd}	| j0                  dk(  rGt3               }
| j4                  |
_        |	j*                  rd|
_        d|
_        d|
_        t=        |	|
      }|	j?                         }	 tA        |	j(                        D ]  }tC        ||      } 	 |	j*                  rodd l$}dd lm%} |jM                          |jO                  d!      5  |jQ                  |      }d d d        |jS                          |jT                  jW                          |dk(  ry g }tA        |      D ]  }tC        ||      }|jY                  |       ! t[        j\                  |      }~n0t        j^                         5  	 ta        |	      }	 d d d        |dk(  ry | j0                  d"z   |rdndz   }i d| j                  d| j                  d| j$                  d#|d|d|	j&                  d|	jb                  d| j                  d| j                   d| j"                  d$| jd                  d%|	jf                  d&|	jh                  d'|	jj                  d(| j4                  d|	j(                  d|| j*                  | j.                  |d)}||jm                  |       tG        to        |	              tG        |        y # tD        $ r}tG        d|	d|        Y d }~y d }~ww xY w# 1 sw Y   xY w# tD        $ r#}tG        d|	d|        Y d }~d d d        y d }~ww xY w# 1 sw Y   xY w)+Nrk   rg   r   cpuFr   fp32fp16bf16r   r   r   r   r4   r   r   r    r   r%   Tr&   r6   r'   r+   r(   r)   r*   r5   r,   ort   zFailed to run config=z. Exception: r   r   :use_gpur$   r!   r"   r#   intra_op_num_threads)r)   r5   engineaverage_latencyrd   )8r   use_cuda_graphr+   rS   rk   rl   r   r_   float16bfloat16r   r   r   r   r   r   r   r    r6   r'   r(   r)   r*   r5   r   r   r   enable_profilinglog_severity_levellog_verbosity_levelrw   r[   r   r   	Exceptionri   r   r   r   r   r}   r   rr   end_profilingappend
statisticsmeanno_gradr   r%   r   r!   r"   r#   writerowr=   )r   r   r   r&   r+   rp   r   r4   dtypesre   sess_optionssessionr   r   er   r   latency_listlatencyr   r   rows                         r8   run_testr     s    LLG"11<<GJJ--/	fi0*	e$!)mmU]]ENNSF ??..  ..	
  ?? {{ jj   , TZZ  $$    !44!" "66#$  22%& 'F, {{e%',0,E,E)%%,0L)./L+/0L, 6))+
	6>>*#GZ8 + %%#$$&y)MM*- *##%--/a<wA%gz:G(   %//,7]]_"+F"3  a<[[3G&?FdooT^^ 	 	7	
 	. 	v)) 	FOO 	doo 	$++ 	 	T22 	f'' 	f'' 	V%% 	 9 9  	6>>!" 	7#$  $77"55*+C0 C 	T&\N	SEOA  	*6)=<=	 *)&  .vi}QC@A _ _sT   #&P( Q%R'Q(	Q1QQQ	R%R6RRRRc                 T   | j                   rdnd}dj                  || j                  t        j                         j                  d            }t        |dd      5 }g d}t        j                  ||	      }|j                          t        | |       d d d        y # 1 sw Y   y xY w)
Ngpur   zbenchmark_sam_{}_{}_{}.csvz%Y%m%d-%H%M%Sa )r   newline)r   r   r6   r   r&   r'   r%   r   r   r    r$   r!   r"   r#   r   r(   r+   r)   r5   r   r   )
fieldnames)r   formatr   r   nowstrftimeopencsv
DictWriterwriteheaderr   )r   featurescsv_filenamecsv_filecolumn_namesr   s         r8   run_perf_testr     s    u%H/660L
 
lb	1X
. ^^HF
 z"7 
2	1	1s   8BB'c                  R   t        j                  d      } | j                  ddddgdd       | j                  d	dg d
dd       | j                  dddd       | j                  d       | j                  dddd       | j                  d       | j                  ddt        g ddd       | j                  ddt        dd       | j                  ddt        dd        | j                  d!dt        dd"       | j                  d#dt        d$d%       | j                  d&dt        d'd(       | j                  d)dt
        d*d*d+gd,-       | j                  d.dddd/0       | j                  d1dddd20       | j                  d3dddd40       | j                  d5dddd60       | j                  d7dt
        d8g d9d:-       | j                  d;dt
        d<d=       | j                  d>dt
        d?d@       | j                  dAdt
        d g dBdC-       | j                         }|S )DNz,Benchmark SMA2 for ONNX Runtime and PyTorch.)descriptionz--componentFr   image_decoderzDcomponent to benchmark. Choices are image_encoder and image_decoder.)requiredchoicesdefaulthelpz--dtyper   r   zData type for inference.z	--use_gpu
store_truezUse GPU for inference.)r   actionr  )r   z--use_cuda_graphzUse cuda graph in onnxruntime.)r   z--intra_op_num_threads)r   r   rJ   r   r      r   z&intra_op_num_threads for onnxruntime. )r   r   r  r  r  z--batch_sizer   z
batch size)r   r   r  r  z--heightr   zimage heightz--widthzimage widthz	--repeatsr   z8number of repeats for performance test. Default is 1000.z	--warm_upr   z)number of runs for warm up. Default is 5.z--enginer   rS   zengine for inference)r   r   r  r  r  z--multimask_outputz:Export mask_decoder or image_decoder with multimask_output)r   r  r  r  z--prefer_nhwcz;Use prefer_nhwc=1 provider option for CUDAExecutionProviderz--enable_nvtx_profilezVEnable nvtx profiling. It will add an extra run for profiling before performance test.z--enable_torch_profilezYEnable PyTorch profiling. It will add an extra run for profiling before performance test.z--model_typer1   r.   zsam2 model namez
--sam2_dirz./segment-anything-2z6The directory of segment-anything-2 git root directoryz--onnx_pathz6./sam2_onnx_models/sam2_hiera_large_image_encoder.onnxzpath of onnx modelz--torch_compile_mode)zreduce-overheadr   zmax-autotune-no-cudagraphsr   z4torch compile mode. none will disable torch compile.)argparseArgumentParseradd_argumentset_defaultsra   r`   
parse_args)parserr   s     r8   _parse_argumentsr    s   $$1_`F
 /2S   E+CVZt   %	   &
-	   u-
 #5            G   8    #   I   J   e    h   "c   &E   H!   YC   DKr:   __main__z
arguments:r   r   r   r   rg   Fr<   )/r~   r  r   r   r   r   typingr   r   r   rS   r   r   r   r   
sam2_utilsr	   r
   r   onnxruntimer   r   r   *onnxruntime.transformers.io_binding_helperr   r   rs   rw   ry   r   r   	Namespacer   r   r   r  r\   r   ri   r5   r   r   rk   is_availabler   r*   r)   rd   r:   r8   <module>r     s    
    * *  * * N N Q Q BU Upz DT $:  6 6+ |.j |.B ,0x


x(xv"#J]@ zD	Jtf
&48NNo4U.[a||zz&&(((;;%*.E.GGGG(-D% ++++,,,,4#<#<d+ r:   