import gradio as gr import numpy as np import torch from PIL import Image from transformers import SamModel, SamProcessor from gradio_image_prompter import ImagePrompter device = 'cpu' model_id = "nielsr/slimsam-50-uniform" slim_sam_model = SamModel.from_pretrained(model_id).to(device) slim_sam_processor = SamProcessor.from_pretrained(model_id) def sam_box_inference(image, x_min, y_min, x_max, y_max): processor, model = slim_sam_processor, slim_sam_model inputs = processor( Image.fromarray(image), input_boxes=[[[[x_min, y_min, x_max, y_max]]]], return_tensors="pt" ).to(device) with torch.no_grad(): outputs = model(**inputs) mask = processor.image_processor.post_process_masks( outputs.pred_masks.cpu(), inputs["original_sizes"].cpu(), inputs["reshaped_input_sizes"].cpu() )[0][0][0].numpy() mask = mask[np.newaxis, ...] print(mask) print(mask.shape) return [(mask, "mask")] def sam_point_inference(image, x, y): processor, model = slim_sam_processor, slim_sam_model inputs = processor( image, input_points=[[[x, y]]], return_tensors="pt").to(device) with torch.no_grad(): outputs = model(**inputs) mask = processor.post_process_masks( outputs.pred_masks.cpu(), inputs["original_sizes"].cpu(), inputs["reshaped_input_sizes"].cpu() )[0][0][0].numpy() mask = mask[np.newaxis, ...] print(type(mask)) print(mask.shape) return [(mask, "mask")] def infer_point(img): if img is None: gr.Error("Please upload an image and select a point.") if img["background"] is None: gr.Error("Please upload an image and select a point.") image = img["background"].convert("RGB") point_prompt = img["layers"][0] total_image = img["composite"] img_arr = np.array(point_prompt) if not np.any(img_arr): gr.Error("Please select a point on top of the image.") else: nonzero_indices = np.nonzero(img_arr) img_arr = np.array(point_prompt) nonzero_indices = np.nonzero(img_arr) center_x = int(np.mean(nonzero_indices[1])) center_y = int(np.mean(nonzero_indices[0])) print("Point inference returned.") return (image, sam_point_inference(image, center_x, center_y)) def infer_box(prompts): image = prompts["image"] if image is None: gr.Error("Please upload an image and draw a box before submitting") points = prompts["points"][0] if points is None: gr.Error("Please draw a box before submitting.") print(points) return (image, sam_box_inference(image, points[0], points[1], points[3], points[4])) if __name__ == '__main__': with gr.Blocks(title="SlimSAM") as demo: gr.Markdown("# SlimSAM") gr.Markdown("SlimSAM is the pruned-distilled version of SAM that is smaller.") gr.Markdown("In this demo, you can compare SlimSAM outputs in point and box prompts.") with gr.Tab("Box Prompt"): with gr.Row(): with gr.Column(scale=1): gr.Markdown("To try box prompting, simply upload and image and draw a box on it.") with gr.Row(): with gr.Column(): im = ImagePrompter() btn = gr.Button("Submit") with gr.Column(): output_box_slimsam = gr.AnnotatedImage(label="SlimSAM Output") btn.click(infer_box, inputs=im, outputs=[output_box_slimsam]) with gr.Tab("Point Prompt"): with gr.Row(): with gr.Column(scale=1): gr.Markdown("To try point prompting, simply upload and image and leave a dot on it.") with gr.Row(): with gr.Column(): im = gr.ImageEditor( type="pil", ) with gr.Column(): output_slimsam = gr.AnnotatedImage(label="SlimSAM Output") im.change(infer_point, inputs=im, outputs=[output_slimsam]) demo.launch(debug=True)