Spaces:

Abijith
/

Segmentation-Annotator-using-MobileSAM

Runtime error

App Files Files Community

Segmentation-Annotator-using-MobileSAM / main.py

Abijith

Upload 39 files

6d6f995 10 months ago

raw

history blame

No virus

5.83 kB

	import os
	import gradio as gr
	import numpy as np
	from PIL import Image
	import argparse
	import pathlib
	from torch.nn import functional as F

	from show import *
	from per_segment_anything import sam_model_registry, SamPredictor


	parser = argparse.ArgumentParser()
	parser.add_argument("-op", "--output-path", type=str, default='default')
	args = parser.parse_args()


	class ImageMask(gr.components.Image):
	"""
	Sets: source="canvas", tool="sketch"
	"""

	is_template = True

	def __init__(self, **kwargs):
	super().__init__(source="upload", tool='select', interactive=True, **kwargs)

	def preprocess(self, x):
	return super().preprocess(x)


	def point_selection(mask_sim, topk=1):
	# Top-1 point selection
	w, h = mask_sim.shape
	topk_xy = mask_sim.flatten(0).topk(topk)[1]
	topk_x = (topk_xy // h).unsqueeze(0)
	topk_y = (topk_xy - topk_x * h)
	topk_xy = torch.cat((topk_y, topk_x), dim=0).permute(1, 0)
	topk_label = np.array([1] * topk)
	topk_xy = topk_xy.cpu().numpy()

	# Top-last point selection
	last_xy = mask_sim.flatten(0).topk(topk, largest=False)[1]
	last_x = (last_xy // h).unsqueeze(0)
	last_y = (last_xy - last_x * h)
	last_xy = torch.cat((last_y, last_x), dim=0).permute(1, 0)
	last_label = np.array([0] * topk)
	last_xy = last_xy.cpu().numpy()

	return topk_xy, topk_label, last_xy, last_label

	def inference_scribble(image):
	# in context image and mask
	ic_image = image["image"]
	ic_mask = image["mask"]
	ic_image = np.array(ic_image.convert("RGB"))
	ic_mask = np.array(ic_mask.convert("RGB"))

	# sam_type, sam_ckpt = 'vit_h', 'sam_vit_h_4b8939.pth' # SAM Model
	sam_type, sam_ckpt = 'vit_t', 'weights/mobile_sam.pt' # MobileSAM
	# sam = sam_model_registry[sam_type](checkpoint=sam_ckpt).cuda() #SAM loading
	sam = sam_model_registry[sam_type](checkpoint=sam_ckpt) #SAM loading
	# sam = sam_model_registry[sam_type](checkpoint=sam_ckpt) # MObileSAM loading
	predictor = SamPredictor(sam)

	# Image features encoding
	ref_mask = predictor.set_image(ic_image, ic_mask)
	ref_feat = predictor.features.squeeze().permute(1, 2, 0)

	ref_mask = F.interpolate(ref_mask, size=ref_feat.shape[0: 2], mode="bilinear")
	ref_mask = ref_mask.squeeze()[0]

	# Target feature extraction
	print("======> Obtain Location Prior" )
	target_feat = ref_feat[ref_mask > 0]
	target_embedding = target_feat.mean(0).unsqueeze(0)
	target_feat = target_embedding / target_embedding.norm(dim=-1, keepdim=True)
	target_embedding = target_embedding.unsqueeze(0)

	test_image = ic_image
	outputs = []

	print("======> Testing Image")
	# Image feature encoding
	predictor.set_image(test_image)
	test_feat = predictor.features.squeeze()

	# Cosine similarity
	C, h, w = test_feat.shape
	test_feat = test_feat / test_feat.norm(dim=0, keepdim=True)
	test_feat = test_feat.reshape(C, h * w)
	sim = target_feat @ test_feat

	sim = sim.reshape(1, 1, h, w)
	sim = F.interpolate(sim, scale_factor=4, mode="bilinear")
	sim = predictor.model.postprocess_masks(
	sim,
	input_size=predictor.input_size,
	original_size=predictor.original_size).squeeze()

	# Positive-negative location prior
	topk_xy_i, topk_label_i, last_xy_i, last_label_i = point_selection(sim, topk=1)
	topk_xy = np.concatenate([topk_xy_i, last_xy_i], axis=0)
	topk_label = np.concatenate([topk_label_i, last_label_i], axis=0)

	# Obtain the target guidance for cross-attention layers
	sim = (sim - sim.mean()) / torch.std(sim)
	sim = F.interpolate(sim.unsqueeze(0).unsqueeze(0), size=(64, 64), mode="bilinear")
	attn_sim = sim.sigmoid_().unsqueeze(0).flatten(3)

	# First-step prediction
	masks, scores, logits, _ = predictor.predict(
	point_coords=topk_xy,
	point_labels=topk_label,
	multimask_output=True,
	attn_sim=attn_sim, # Target-guided Attention
	target_embedding=target_embedding # Target-semantic Prompting
	)
	best_idx = 0

	# Cascaded Post-refinement-1
	masks, scores, logits, _ = predictor.predict(
	point_coords=topk_xy,
	point_labels=topk_label,
	mask_input=logits[best_idx: best_idx + 1, :, :],
	multimask_output=True)
	best_idx = np.argmax(scores)

	# Cascaded Post-refinement-2
	y, x = np.nonzero(masks[best_idx])
	x_min = x.min()
	x_max = x.max()
	y_min = y.min()
	y_max = y.max()
	input_box = np.array([x_min, y_min, x_max, y_max])
	masks, scores, logits, _ = predictor.predict(
	point_coords=topk_xy,
	point_labels=topk_label,
	box=input_box[None, :],
	mask_input=logits[best_idx: best_idx + 1, :, :],
	multimask_output=True)
	best_idx = np.argmax(scores)

	final_mask = masks[best_idx]
	mask_colors = np.zeros((final_mask.shape[0], final_mask.shape[1], 3), dtype=np.uint8)
	mask_colors[final_mask, :] = np.array([[128, 0, 0]])
	# Save annotations

	return [Image.fromarray((mask_colors * 0.6 + test_image * 0.4).astype('uint8'), 'RGB'),
	Image.fromarray((mask_colors ).astype('uint8'), 'RGB')]

	main_scribble = gr.Interface(
	fn=inference_scribble,
	inputs=
	gr.ImageMask(label="[Stroke] Draw on Image", type='pil'),
	outputs=[
	gr.outputs.Image(type="pil", label="Mask with Image"),
	gr.outputs.Image(type="pil", label="Mask")
	],
	allow_flagging="never",
	title="SAM based Segment Annotator.",
	description='Sketch the portion where you want to create Mask.',
	examples=[
	"./cardamage_example/0006.JPEG",
	"./cardamage_example/0008.JPEG",
	"./cardamage_example/0206.JPEG"
	]
	)
	main_scribble.launch(share=True)