
    g"              	           d dl Z d dlZd dlZd dlmZmZ d dlmZ d dlm	Z	 d dlm
Z
  e j                  e      Z G d de
j                        Z	 	 dde	d	ed
edefdZde	d	ed
edefdZy)    N)SAM2ImageEncoderrandom_sam2_input_image)SAM2PromptEncoder)SAM2Base)nnc                        e Zd Z	 ddedededdf fdZ ej                         dej                  dej                  d	ej                  d
ej                  dej                  dej                  fd       Z	 xZ
S )SAM2MaskDecoder	sam_modelmultimask_outputdynamic_multimask_via_stabilityreturnNc                     t         |           |j                  | _        |j                  | _        || _        || _        || _        y )N)	super__init__sam_mask_decodermask_decodersam_prompt_encoderprompt_encodermodelr   r   )selfr
   r   r   	__class__s       f/var/www/openai/venv/lib/python3.12/site-packages/onnxruntime/transformers/models/sam2/mask_decoder.pyr   zSAM2MaskDecoder.__init__   sC     	%66'::
 0/N,    image_features_0image_features_1image_embeddingsimage_pesparse_embeddingsdense_embeddingsc           	      h   | j                   j                  |||||j                  d   dkD  ||g      \  }}}	}	| j                  r |ddddddddf   }|ddddf   }||fS | j                  r#| j                   j                  ||      \  }}||fS |ddddddddf   }|ddddf   }||fS )a  
        Decode masks from image and prompt embeddings. Only support H=W=1024.

        Args:
            image_features_0 (torch.Tensor): [1, 32, H/4, W/4]. high resolution features of level 0 from image encoder.
            image_features_1 (torch.Tensor): [1, 64, H/8, W/8]. high resolution features of level 1 from image encoder.
            image_embeddings (torch.Tensor): [1, 256, H/16, W/16]. image embedding from image encoder.
            image_pe (torch.Tensor): [1, 256, H/16, W/16]. image positional encoding.
            sparse_embeddings (torch.Tensor): [L, P+1, 256], embedding for points and boxes.
            dense_embeddings (torch.Tensor):  [L, 256, H/16, W/16]. embedding for input masks.

        Returns:
            low_res_masks (torch.Tensor, optional): [1, M, H/4, W/4]. low resolution masks.
            iou_predictions (torch.Tensor): [1, M]. scores for M masks.
        r      )r   r   sparse_prompt_embeddingsdense_prompt_embeddingsrepeat_imagehigh_res_featuresN)r   predict_masksshaper   r    _dynamic_multimask_via_stability)
r   r   r   r   r   r   r   low_res_masksiou_predictions_s
             r   forwardzSAM2MaskDecoder.forward   s    2 04/@/@/N/N-%6$4*003a7/1AB 0O 0
,1   )!QRA+6M-ae4O o-- 11 .2->->-_-_.*M? o-- *!QqS!Q,7M-a1f5Oo--r   )T)__name__
__module____qualname__r   boolr   torchno_gradTensorr,   __classcell__)r   s   @r   r	   r	      s    
 15	OO O *.	O
 
O U]]_/.,,/.  ,,/.  ,,	/.
 ,,/. !<</.  ,,/. /.r   r	   
sam2_modelonnx_model_pathr   r   c                    t        |       j                         }t               }t        |       j                         } ||      \  }}	}
t        j                  d|j                         t        j                  d|	j                         t        j                  d|
j                         d}d}t        j                  dd||dft        j                        }t        j                  dd	||ft        j                        }t        j                  |d	d
d
t        j                        }t        j                  d	t        j                        } |||||      \  }}}t        j                  d|j                         t        j                  d|j                         t        j                  d|j                         t        | ||      }||	|
|||f} || \  }}t        j                  d|j                         t        j                  d|j                         t        j                         5  |sJt        j                  dt        j                   j"                         t        j                  dt$               t        j&                  j)                  |||dddg dddgdddddiddiddid	       d d d        t+        d|       y # 1 sw Y   xY w)Nzimage_features_0.shape: %szimage_features_1.shape: %szimage_embeddings.shape: %s      r      lowhighsizedtyper!      r?   zsparse_embeddings.shape: %szdense_embeddings.shape: %szimage_pe.shape: %szlow_res_masks.shape: %sziou_predictions.shape: %signore)categoryT   r   r   r   r   r   r   r)   r*   
num_labelsznum_points+1)r   r!   )r   r   r)   r*   )export_paramsopset_versiondo_constant_foldinginput_namesoutput_namesdynamic_axesz mask decoder onnx model saved to)r   cpur   r   loggerinfor'   r1   randintfloatzerosonesr	   warningscatch_warningsfilterwarningsjitTracerWarningUserWarningonnxexportprint)r5   r6   r   r   verbosesam2_prompt_encoderimagesam2_encoderr   r   r   rF   
num_pointspoint_coordspoint_labelsinput_maskshas_input_masksr   r   r   sam2_mask_decoderinputsr)   r*   s                           r   export_mask_decoder_onnxrh   R   su    ,J7;;=#%E#J/335L;G;N8&(8
KK,.>.D.DE
KK,.>.D.DE
KK,.>.D.DE JJ==QTZQR8S[`[f[fgL==QQj*5MUZU`U`aL++j!S#U[[IKjj%++6O4GlK51' KK-/@/F/FG
KK,.>.D.DE
KK$hnn5'
4DFef 02BHN_aqrF%6%?"M?
KK)=+>+>?
KK+_-B-BC		 	 	"##Huyy7N7NO##H{C

 $ *+<=)5.%I%&$5"#\!2$%|#4	! 	 	
	 
#8 

,o>9 
#	"s   -BK		Kc           
      (   t        |       j                         }t               }t        |       j                         } ||      \  }}}	d}
d}t	        j
                  dd|
|dft        j                        }t	        j
                  dd|
|ft        j                        }t	        j                  |
dddt        j                        }t	        j                  dt        j                        } |||||      \  }}}t        | ||      }|||	|||f} || \  }}dd l
}|j                  ||j                         	      }|j                         }t        t        |            D cg c]  }||   j                    }}t"        j%                  d
|       |j'                         }t        t        |            D cg c]  }||   j                    }}t"        j%                  d|       |j)                  ||j+                         |j+                         |	j+                         |j+                         |j+                         |j+                         d      }t-        |      D ])  \  }}t"        j%                  d|||   j.                         + |\  } }!t        j0                  j3                  |t	        j4                  |       dd       t        j0                  j3                  |t	        j4                  |!      dd       t7        d|        y c c}w c c}w )Nr!      r   r:   r8   r;   r@   rA   )	providerszinput_names: %szoutput_names: %srE   zoutput %s shape: %sg{Gzt?g-C6?)atolrtolzonnx model has been verified: )r   rM   r   r   r1   rP   rQ   randrS   r	   onnxruntimeInferenceSessionget_available_providers
get_inputsrangelennamerN   rO   get_outputsrunnumpy	enumerater'   testingassert_closetensorr\   )"r5   r6   r   r   r^   r_   r`   r   r   r   rF   ra   rb   rc   rd   re   r   r   r   rf   rg   r)   r*   ro   ort_sessionmodel_inputsirJ   model_outputsrK   outputsoutput_nameort_low_res_masksort_iou_predictionss"                                     r   test_mask_decoder_onnxr      s    ,J7;;=#%E#J/335L;G;N8&(8JJ==QTZQR8S[`[f[fgL==QQj*5MUZU`U`aL**ZCEKKHKjj%++6O4GlK51' (
4DFef 02BHN_aqrF%6%?"M?..+JmJmJo.pK))+L16s<7H1IJ1IA<?''1IKJ
KK!;/++-M38]9K3LM3LaM!$))3LLM
KK"L1oo 0 6 6 8 0 6 6 8 0 6 6 8 (!2!8!8!: 0 6 6 8	

G $L1;);
8H8HI 2 .5**	MM}ell;L.MTX_cd	MM=P0QX\cgh	*?*;
<=3 K Ns   $L
8L)TF)loggingrT   r1   image_encoderr   r   r   r   sam2.modeling.sam2_baser   r   	getLoggerr-   rN   Moduler	   strr0   rh   r    r   r   <module>r      s   
    C , , 			8	$>.bii >.J -1B?B?B? B? &*	B?J9>9>9> 9> &*	9>r   