
    g                          d dl Z d dlZd dlmZ d dlmZ d dlmZ  e j                  e      Z	 G d dej                        Zdedefd	Zdedefd
Zy)    N)SAM2Base)compare_tensors_with_tolerance)nnc            	       T    e Zd Zdef fdZ ej                         dej                  dej                  dej                  dej                  fd       Zdej                  dej                  dej                  fd	Z	dej                  dej                  dej                  fd
Z
 xZS )SAM2PromptEncoder	sam_modelc                 R    t         |           |j                  | _        || _        y )N)super__init__sam_prompt_encoderprompt_encodermodel)selfr   	__class__s     h/var/www/openai/venv/lib/python3.12/site-packages/onnxruntime/transformers/models/sam2/prompt_encoder.pyr   zSAM2PromptEncoder.__init__   s#    '::
    point_coordspoint_labelsinput_maskshas_input_masksc                     | j                  ||      }| j                  ||      }| j                  j                         }|||fS )aj  Encode prompts.

           Args:
            point_coords (torch.Tensor): [L, P, 2] shape and float32 dtype and contains the absolute pixel
                                         coordinate in (x, y) format of the P input points in image of size 1024x1024.
            point_labels (torch.Tensor): shape [L, P] and int32 dtype, where 1 means
                                         positive (foreground), 0 means negative (background), -1 means padding,
                                         2 (box left upper corner), 3 (box right bottom corner).
            input_masks (torch.Tensor): [L, 1, H/4, W/4]. Low resolution mask input to the model.
                                        Typically coming from a previous iteration.
            has_input_masks (torch.Tensor): [L]. 1.0 if input_masks is used, 0.0 otherwise.
        Returns:
            sparse_embeddings (torch.Tensor): [L, P+1, 256], embedding for points and boxes.
            dense_embeddings (torch.Tensor):  [L, 256, 64, 64]. embedding for input masks.
            image_pe (torch.Tensor, optional): [1, 256, 64, 64]. image positional encoding.
        )_embed_points_embed_masksr   get_dense_pe)r   r   r   r   r   sparse_embeddingsdense_embeddingsimage_pes           r   forwardzSAM2PromptEncoder.forward   sL    0 !..|\J,,[/J&&335 "2H<<r   returnc                 x   |dz   }t        j                  |j                  d   ddf|j                        }t        j                  |j                  d   df|j                         }t        j
                  ||gd      }t        j
                  ||gd      }|d d d d df   | j                  j                  z  |d d d d df<   |d d d d df   | j                  j                  z  |d d d d df<   | j                  j                  j                  |      }|j                  d      j                  |      }||dk7  z  }|| j                  j                  j                  |dk(  z  z   }t        | j                  j                         D ].  }|| j                  j"                  |   j                  ||k(  z  z   }0 |S )Ng      ?r         )device)dim)torchzerosshaper#   onescatr   
image_sizer   pe_layer_pe_encoding	unsqueeze	expand_asnot_a_point_embedweightrangenum_point_embeddingspoint_embeddings)r   r   r   padding_pointpadding_labelpoint_embeddingis          r   r   zSAM2PromptEncoder._embed_points3   s   #c)\%7%7%:Aq$A,J]J]^\%7%7%:A$>|GZGZ[[yy,!>AFyy,!>AF !-Q1W 5

8M8M MQ1W ,Q1W 5

8M8M MQ1W--66CCLQ#--b1;;OL)\R-?@)D,?,?,Q,Q,X,X\hln\n,oot**??@A-0C0C0T0TUV0W0^0^bnrsbs0ttO A r   c                 <   | j                   j                  |      }| j                   j                  j                  j	                  dddd      }t
        j                  d|j                         ||z  d|z
  |z  z   }t
        j                  d|j                         |S )Nr!   r%   zno_mask_embedding.shape: %sg      ?zmask_embedding.shape: %s)r   mask_downscalingno_mask_embedr1   reshapeloggerinfor(   )r   r   r   mask_embeddingno_mask_embeddings        r   r   zSAM2PromptEncoder._embed_masksJ   s    ,,==kJ //==DDLLQPRTUWXY13D3J3JK(>9S?=RVg<gg.0D0DEr   )__name__
__module____qualname__r   r   r&   no_gradTensorr   r   r   __classcell__)r   s   @r   r   r      s    ( 
 U]]_=ll= ll= \\	=
 = =:%,, ell W\WcWc . u|| X]XdXd r   r   
sam2_modelonnx_model_pathc                    t        |       j                         }d}d}t        j                  dd||dft        j                        }t        j                  dd||ft        j
                        }t        j                  |dddt        j                        }t        j                  dt        j                        } |||||      \  }	}
}t        j                  d	|j                         t        j                  d
|j                         t        j                  d|j                         t        j                  d|j                         t        j                  d|	j                         t        j                  d|
j                         t        j                  d|j                         t        j                  j                  |||||f|dddg dg dddddddddidddddid	       t        d|       y )Nr"      r      lowhighsizedtyper!      rP   zpoint_coords.shape: %szpoint_labels.shape: %szinput_masks.shape: %szhas_input_masks.shape: %szsparse_embeddings.shape: %szdense_embeddings.shape: %szimage_pe.shape: %sT   r   r   r   r   )r   r   r   
num_labels
num_points)r   r!   znum_points+1)r   r   r   r   r   )export_paramsopset_versiondo_constant_foldinginput_namesoutput_namesdynamic_axesz#prompt encoder onnx model saved to )r   cpur&   randintfloatint32r'   r)   r=   r>   r(   onnxexportprint)rG   rH   sam2_prompt_encoderrU   rV   r   r   r   r   r   r   r   s               r   export_prompt_encoder_onnxre   S   s    ,J7;;=JJ==QTZQR8S[`[f[fgL==QQj*5MUZU`U`aL++j!S#U[[IKjj%++6O4GlK51' KK(,*<*<=
KK(,*<*<=
KK'):):;
KK+_-B-BC
KK-/@/F/FG
KK,.>.D.DE
KK$hnn5	JJ	|[/B VJ ,> ,>|,%1n!E!"L 1
  $ 

/Ar   c                 h   t        |       j                         }d}d}t        j                  dd||dft        j                        }t        j                  dd||ft        j
                        }t        j                  |dddt        j                        }t        j                  dt        j                        } |||||      \  }	}
}dd l}|j                  ||j                         	      }|j                         }t        t        |            D cg c]  }||   j                   }}t        j!                  d
|       |j#                         }t        t        |            D cg c]  }||   j                   }}t        j!                  d|       |j%                  ||j'                         |j'                         |j'                         |j'                         d      }t)        |      D ])  \  }}t        j!                  d|||   j*                         + |\  }}}t-        d|	t        j.                  |      d      rSt-        d|
t        j.                  |      d      r1t-        d|t        j.                  |      d      rt1        d|        y t1        d|        y c c}w c c}w )Nr!      r   rK   r"   rL   rQ   rR   )	providerszinput_names: %szoutput_names: %srT   zoutput %s shape: %sr   g?)mismatch_percentage_tolerancer   r   zonnx model has been verified: z onnx model verification failed: )r   r]   r&   r^   r_   r`   randr)   onnxruntimeInferenceSessionget_available_providers
get_inputsr2   lennamer=   r>   get_outputsrunnumpy	enumerater(   r   tensorrc   )rG   rH   rd   rU   rV   r   r   r   r   r   r   r   rk   ort_sessionmodel_inputsr8   rZ   model_outputsr[   outputsoutput_nameort_sparse_embeddingsort_dense_embeddingsort_image_pes                           r   test_prompt_encoder_onnxr~      st    ,J7;;=JJ==QTZQR8S[`[f[fgL==QQj*5MUZU`U`aL**ZCEKKHKjj%++6O4GlK51' ..+JmJmJo.pK))+L16s<7H1IJ1IA<?''1IKJ
KK!;/++-M38]9K3LM3LaM!$))3LLM
KK"L1oo(..0(..0&,,..446		
G $L1;);
8H8HI 2 AH=/&LL./*-		
 + 0%,,?S2Ttw
 +%,,|"<\_
 	..?@A00ABCK K Ns   J*,J/)loggingr&   sam2.modeling.sam2_baser   
sam2_utilsr   r   	getLoggerrA   r=   Moduler   strre   r~    r   r   <module>r      sl   
   , 5 			8	$A		 AH,B,B,B^;D;D;Dr   