
    g1*              	           d dl Z d dlZd dlZd dlmc mZ d dlmZm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZ d dlmZ  e j$                  e      Z G d d	ej*                        Z	 	 dd
edededefdZ	 dd
edefdZy)    N)SAM2ImageEncoderrandom_sam2_input_image)SAM2MaskDecoder)SAM2PromptEncoder)SAM2Base)compare_tensors_with_tolerance)nnc                   8    e Zd Z	 	 	 ddedededededdf fdZ ej                         	 dd	ej                  d
ej                  dej                  dej                  dej                  dej                  dej                  dej                  defd       Z
 xZS )SAM2ImageDecoder	sam_modelmultimask_outputdynamic_multimask_via_stabilityreturn_logitsmask_thresholdreturnNc                     t         |           t        |      | _        t	        |||      | _        || _        || _        y )N)super__init__r   prompt_encoderr   mask_decoderr   r   )selfr   r   r   r   r   	__class__s         g/var/www/openai/venv/lib/python3.12/site-packages/onnxruntime/transformers/models/sam2/image_decoder.pyr   zSAM2ImageDecoder.__init__   s@     	/	:+I7GIhi*,    image_features_0image_features_1image_embeddingspoint_coordspoint_labelsinput_maskshas_input_masksoriginal_image_sizeenable_nvtx_profilec
                 D   d}
|	rddl m}  |g d      }
|
|
j                  dd       | j                  ||||      \  }}}|
$|
j	                  d       |
j                  dd	       | j                  ||||||      \  }}|
$|
j	                  d       |
j                  d
d       t        j                  ||d   |d   fdd      }t        j                  |dd      }| j                  s|| j                  kD  }|
!|
j	                  d
       |
j                          |||fS )a  
        Decode masks from image features and prompts. Batched images are not supported. H=W=1024.

        Args:
            image_features_0 (torch.Tensor): [1, 32, H/4, W/4]. high resolution features of level 0 from image encoder.
            image_features_1 (torch.Tensor): [1, 64, H/8, W/8]. high resolution features of level 1 from image encoder.
            image_embeddings (torch.Tensor): [1, 256, H/16, W/16]. image embedding from image encoder.
            point_coords (torch.Tensor): [L, P, 2] shape and float32 dtype and contains the absolute pixel
                                         coordinate in (x, y) format of the P input points in image of size 1024x1024.
            point_labels (torch.Tensor): shape [L, P] and int32 dtype, where 1 means
                                         positive (foreground), 0 means negative (background), -1 means padding,
                                         2 (box left upper corner), 3 (box right bottom corner).
            input_masks (torch.Tensor): [L, 1, H/4, W/4]. Low resolution mask input to the model.
                                        Typically coming from a previous iteration.
            has_input_masks (torch.Tensor): [L]. 1.0 if input_masks is used, 0.0 otherwise.
            original_image_size(torch.Tensor): [2]. original image size H_o, W_o.
            enable_nvtx_profile (bool): enable NVTX profiling.

        Returns:
            masks (torch.Tensor): [1, M, H_o, W_o] where M=3 or 1. Masks of original image size.
            iou_predictions (torch.Tensor): [1, M]. scores for M masks.
            low_res_masks (torch.Tensor, optional): [1, M, H/4, W/4]. low resolution masks.
        Nr   )
NvtxHelper)r   r   post_processr   blue)colorr   redr&   green   bilinearF)modealign_cornersg      @g      @@)nvtx_helperr%   start_profiler   stop_profiler   Finterpolatetorchclampr   r   print_latency)r   r   r   r   r   r   r    r!   r"   r#   r/   r%   sparse_embeddingsdense_embeddingsimage_pelow_res_masksiou_predictionsmaskss                     r   forwardzSAM2ImageDecoder.forward#   sV   H .$%WXK"%%&6f%E8<8K8K,_9
5+X "$$%56%%nE%B)-):):.0@(L]_o*
& "$$^4%%nG%D  #%8%;<	
 M5$?!!D///E"$$^4%%'o}44r   )TFg        F)__name__
__module____qualname__r   boolfloatr   r4   no_gradTensorr=   __classcell__)r   s   @r   r   r      s    
 15# #-- - *.	-
 - - 
- U]]_ %*L5,,L5  ,,L5  ,,	L5
 llL5 llL5 \\L5 L5 #\\L5 "L5 L5r   r   
sam2_modelonnx_model_pathr   verbosec                    d}t        |      }t        |       j                         } ||      \  }}}	t        j	                  d|j
                         t        j	                  d|j
                         t        j	                  d|	j
                         t        | |d      j                         }
d}d}t        j                  d	d
||dft        j                        }t        j                  d	d||ft        j                        }t        j                  |dddt        j                        }t        j                  dt        j                        }t        j                  ddgt        j                        }|||	|||||f}t        j	                  d|j
                         t        j	                  d|j
                         t        j	                  d|j
                         t        j	                  d|j
                         t        j	                  d|j
                         |ri |
| \  }}}t        j	                  d|j
                         t        j	                  d|j
                         t        j	                  d|j
                         g d}g d}ddddddd	did	diddddd	did	did }t        j                         5  |sJt        j                   d!t        j"                  j$                  "       t        j                   d!t&        "       t        j(                  j+                  |
||dd#d|||$	       d d d        t        j	                  d%|       y # 1 sw Y    xY w)&Nr+   zimage_features_0.shape: %szimage_features_1.shape: %szimage_embeddings.shape: %sTr   r         r      lowhighsizedtype   rS   i  i  zpoint_coords.shape: %szpoint_labels.shape: %szinput_masks.shape: %szhas_input_masks.shape: %szoriginal_image_size.shape: %szmasks.shape: %sziou_predictions.shape: %szlow_res_masks.shape: %s)r   r   r   r   r   r    r!   r"   )r<   r;   r:   
num_labels
num_points)r   r+   original_image_heightoriginal_image_width)r   rL   rM   )r   r   r    r!   r<   r:   r;   ignore)category   )export_paramsopset_versiondo_constant_foldinginput_namesoutput_namesdynamic_axeszdecoder onnx model saved to %s)r   r   cpuloggerinfoshaper   r4   randintrC   int32zerosonestensorwarningscatch_warningsfilterwarningsjitTracerWarningUserWarningonnxexport)rG   rH   r   rI   
batch_sizeimagesam2_encoderr   r   r   sam2_decoderrV   rW   r   r   r    r!   r"   example_inputsr<   r;   r:   r`   ra   rb   s                            r   export_decoder_onnxry   s   s    J#J/E#J/335L;G;N8&(8
KK,.>.D.DE
KK,.>.D.DE
KK,.>.D.DE#)(, 
ce	  JJ==QTZQR8S[`[f[fgL==QQj*5MUZU`U`aL++j!S#U[[IKjj%++6O,,d|5;;G 		N KK(,*<*<=
KK(,*<*<=
KK'):):;
KK+_-B-BC
KK/1D1J1JK0<n0M-%u{{3/1F1FG-}/B/BC	K AL )\:(\:<(|,!&=BXY\*|,L 
	 	 	"##Huyy7N7NO##H{C

 $#%% 	 
	
 
#" KK0/B# 
#	"s   A5MM$c                 `   d}t        |      }t        |       j                         } ||      \  }}}t        | |d      j                         }	d}
d}t	        j
                  dd|
|dft        j                        }t	        j
                  dd|
|ft        j                        }t	        j                  |
dd	d	t        j                  
      }t	        j                  dt        j                  
      }t	        j                  ddgt        j                  
      }||||||||f} |	| \  }}}dd l
}|j                  ||j                               }|j                         }t        t        |            D cg c]  }||   j                    }}t"        j%                  d|       |j'                         }t        t        |            D cg c]  }||   j                    }}t"        j%                  d|       t        t        |            D ci c]#  }||   j                   ||   j)                         % }}|j+                  ||      }t-        |      D ]+  \  }}t"        j%                  | d||   j.                         - |\  }} }!t1        d|j                         t	        j                  |      j                               rMt1        d|t	        j                  |             r-t1        d|t	        j                  |!            rt3        d|       y t3        d|       y c c}w c c}w c c}w )Nr+   TrK      r   rN   rL   rO   rT   rU   i  )	providerszinput_names: %szoutput_names: %sz
.shape: %sr<   r;   r:   zonnx model has been verified:zonnx model verification failed:)r   r   rc   r   r4   rg   rC   rh   ri   rk   onnxruntimeInferenceSessionget_available_providers
get_inputsrangelennamerd   re   get_outputsnumpyrun	enumeraterf   r   print)"rG   rH   r   rt   ru   rv   r   r   r   sam2_image_decoderrV   rW   r   r   r    r!   r"   rx   r<   r;   r:   r}   ort_sessionmodel_inputsir`   model_outputsra   inputsoutputsoutput_name	ort_masksort_iou_predictionsort_low_res_maskss"                                     r   test_decoder_onnxr      s    J#J/E#J/335L;G;N8&(8))(, 
ce	  JJ==QTZQR8S[`[f[fgL==QQj*5MUZU`U`aL++j!S#U[[IKkk!5;;7O,,d|5;;G 		N -?,O)E?M..+JmJmJo.pK))+L16s<7H1IJ1IA<?''1IKJ
KK!;/++-M38]9K3LM3LaM!$))3LLM
KK"L1GLSQ]M^G_`G_!l1o""N1$5$;$;$==G_F`oolF3G#L1;{m:.
0@0@A 2 9@5I"$5&wu||I?V?\?\?^_*+<ou||\oOpq*?M5<<XiKjk-?/A) K N`s   8L!L&(L+)FFr>   )loggingrl   r4   torch.nn.functionalr	   
functionalr2   image_encoderr   r   r   r   r   r   sam2.modeling.sam2_baser   
sam2_utilsr   	getLoggerr?   rd   Moduler   strrB   ry   r    r   r   <module>r      s   
      C ( , , 5 			8	$\5ryy \5D #	\C\C\C \C 	\CD ?B?B?Br   