
    g              	           d dl Z d dlZd dlZd dlmZ d dlmZmZ d dlmZ d dl	Z	 e j                  e      Z G d dej                        Z	 	 ddeded	ed
efdZ	 ddedefdZy)    N)SAM2Base)compare_tensors_with_tolerancerandom_sam2_input_image)nnc            
            e Zd Zdeddf fdZ	 ddej                  dedeej                  ej                  ej                  f   fdZ	 xZ
S )	SAM2ImageEncoder	sam_modelreturnNc                 t    t         |           || _        |j                  | _        |j                  | _        y )N)super__init__modelimage_encoderno_mem_embed)selfr	   	__class__s     g/var/www/openai/venv/lib/python3.12/site-packages/onnxruntime/transformers/models/sam2/image_encoder.pyr   zSAM2ImageEncoder.__init__   s1    
&44%22    imageenable_nvtx_profilec           
         d}|rddl m}  |ddg      }||j                  d       | j                  |      }|"|j	                  d       |j                  d       | j
                  j                  j                  |d   d         |d   d<   | j
                  j                  j                  |d   d         |d   d<   |d   | j
                  j                   d }|d   | j
                  j                   d }|D cg c]   }|j                  d	   |j                  d
   f" }	}|D cg c]$  }|j                  d      j                  ddd      & }
}|
d
   | j                  z   |
d
<   t        |
ddd
   |	ddd
         D cg c]*  \  }} |j                  ddd      j                  dd
g| , c}}ddd
   }|!|j	                  d       |j!                          |d   |d   |d   fS c c}w c c}w c c}}w )a  
        Encodes images into features.

        Only supports H=W=1024. If you want to use different image sizes like 512x512,
        see https://github.com/facebookresearch/segment-anything-2/issues/138.

        Args:
            image (torch.Tensor): images of shape [B, 3, H, W], B is batch size, H and W are height and width.
            enable_nvtx_profile (bool): enable NVTX profiling.

        Returns:
            image_features_0: image features of shape [B, 32, H/4, W/4] - high resolution features of level 0
            image_features_1: image features of shape [B, 64, H/8, W/8] - high resolution features of level 1
            image_embeddings: image features of shape [B, 256, H/16, W/16] - 16 is the backbone_stride
        Nr   )
NvtxHelperr   post_processbackbone_fpn   vision_pos_enc   )nvtx_helperr   start_profiler   stop_profiler   sam_mask_decoderconv_s0conv_s1num_feature_levelsshapeflattenpermuter   zipreshapeprint_latency)r   r   r   r    r   backbone_outfeature_mapsvision_pos_embedsx
feat_sizesvision_featsfeat	feat_sizefeatss                 r   forwardzSAM2ImageEncoder.forward   s3   ( .$o~%FGK"%%o6))%0"$$_5%%n5 +/***E*E*M*Ml[iNjklNm*n^$Q'*.***E*E*M*Ml[iNjklNm*n^$Q' $N3TZZ5R5R4R4TU()9:DJJ<Y<Y;Y;[\:KL:KQqwwr{AGGBK0:K
L @LL|!		!,,Q15|L'+d.?.??R $'|DbD'9:dd;K#L
#Li *DLLAq!))!R<)<#L
 B$
 "$$^4%%'Qxq58++# M M
s   =%G-()G2/G7F)__name__
__module____qualname__r   r   torchTensorbooltupler6   __classcell__)r   s   @r   r   r      sX    3( 3t 3 %*<,||<, "<, 
u||U\\5<<7	8	<,r   r   
sam2_modelonnx_model_pathdynamic_batch_axesverbosec                    t               }t        |       j                         } ||      \  }}}t        j	                  d|j
                         t        j	                  d|j
                         t        j	                  d|j
                         t        j	                  d|j
                         d }	|rddiddiddiddid}	t        j                         5  |sJt        j                  dt        j                  j                  	       t        j                  dt        	       t        j                  j                  |||d
dd
dgg d|		       d d d        t        d|       y # 1 sw Y   xY w)Nimage.shape: %simage_features_0.shape: %simage_features_1.shape: %simage_embeddings.shape: %sr   
batch_size)r   image_features_0image_features_1image_embeddingsignore)categoryT   r   )rJ   rK   rL   )export_paramsopset_versiondo_constant_foldinginput_namesoutput_namesdynamic_axeszencoder onnx model saved to)r   r   cpuloggerinfor'   warningscatch_warningsfilterwarningsr;   jitTracerWarningUserWarningonnxexportprint)
r@   rA   rB   rC   r   sam2_encoderrJ   rK   rL   rU   s
             r   export_image_encoder_onnxrc   X   sA    $%E#J/335L;G;N8&(8
KK!5;;/
KK,.>.D.DE
KK,.>.D.DE
KK,.>.D.DEL&!"L 1!"L 1!"L 1	
 
	 	 	"##Huyy7N7NO##H{C

 $ 	U% 	 
	
	 
#  

'9! 
#	"s   A8E$$E-c                 ,   t        j                  |t        j                               }|j                         }t	        t        |            D cg c]  }||   j                   }}t        j                  d|       |j                         }t	        t        |            D cg c]  }||   j                   }}t        j                  d|       |rddgndg}	|	D ]  }
t        |
      }t        |       j                         } ||j                               \  }}}t        j                  d|j                         t        j                  d|j                         t        j                  d|j                         t        j                  d	|j                         |j                  |d
|j!                         i      }t#        |      D ])  \  }}t        j                  d|||   j                         + |\  }}}t%        d|t'        j(                  |      d      rWt%        d|t'        j(                  |      d      r5t%        d|t'        j(                  |      d      rt+        d|
 d|        t+        d|
 d|         y c c}w c c}w )N)	providerszinput_names: %szoutput_names: %sr   r   rE   rF   rG   rH   r   zoutput %s shape %srJ   )mismatch_percentage_tolerancerK   rL   z,onnx model has been verified for batch_size=z: z.onnx model verification failed for batch_size=)onnxruntimeInferenceSessionget_available_providers
get_inputsrangelennamerW   rX   get_outputsr   r   rV   cloner'   runnumpy	enumerater   r;   tensorra   )r@   rA   rB   ort_sessionmodel_inputsirS   model_outputsrT   batch_sizesrI   r   rb   rJ   rK   rL   outputsoutput_nameort_image_features_0ort_image_features_1ort_image_embeddingss                        r   test_image_encoder_onnxr~      sI   
 ..+JmJmJopK))+L16s<7H1IJ1IA<?''1IKJ
KK!;/++-M38]9K3LM3LaM!$))3LLM
KK"L1.1a&QCK!
'
3'
3779?KEKKM?Z<*,<%u{{302B2H2HI02B2H2HI02B2H2HI//,%++-0HI'5NA{KK,k71:;K;KL 6KRH24H +" 12./	 /" 12./	 /" 12./	 @BN_`aB:,bQ`PabcO " K Ns   J$J)FFr7   )loggingrY   r;   sam2.modeling.sam2_baser   
sam2_utilsr   r   r   rg   	getLoggerr8   rW   Moduler   strr=   rc   r~    r   r   <module>r      s   
    , N  			8	$C,ryy C,R  %	(:(:(: (: 	(:\ 7d7d7dr   