Spaces:

SkalskiP
/

EfficientSAM

Runtime error

App Files Files Community

SkalskiP commited on Dec 9, 2023

Commit

b577d3a

1 Parent(s): 725f958

SAM box inference is working

Browse files

Files changed (3) hide show

.gitignore +2 -1
app.py +107 -11
requirements.txt +2 -2

.gitignore CHANGED Viewed

	@@ -1 +1,2 @@
1	- venv/


1	+ venv/
2	+ .idea/

app.py CHANGED Viewed

@@ -1,29 +1,125 @@
-import torch
 import gradio as gr
 MARKDOWN = """
 # EfficientSAM sv. SAM
 """
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-def inference(image):
     return image
 with gr.Blocks() as demo:
     gr.Markdown(MARKDOWN)
-    with gr.Row():
-        input_image = gr.Image()
-        output_image = gr.Image()
-    with gr.Row():
-        submit_button = gr.Button("Submit")
     submit_button.click(
-        inference,
-        inputs=[input_image],
-        outputs=output_image
     )
 demo.launch(debug=False, show_error=True)

+import time
 import gradio as gr
+import numpy as np
+import supervision as sv
+from PIL import Image
+import torch
+from transformers import SamModel, SamProcessor
+from typing import Tuple
 MARKDOWN = """
 # EfficientSAM sv. SAM
 """
+DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+SAM_MODEL = SamModel.from_pretrained("facebook/sam-vit-huge").to(DEVICE)
+SAM_PROCESSOR = SamProcessor.from_pretrained("facebook/sam-vit-huge")
+MASK_ANNOTATOR = sv.MaskAnnotator(
+    color=sv.Color.red(),
+    color_lookup=sv.ColorLookup.INDEX)
+def annotate_image(image: np.ndarray, detections: sv.Detections) -> np.ndarray:
+    bgr_image = image[:, :, ::-1]
+    annotated_bgr_image = MASK_ANNOTATOR.annotate(
+        scene=bgr_image, detections=detections)
+    return annotated_bgr_image[:, :, ::-1]
+def efficient_sam_inference(
+    image: np.ndarray,
+    x_min: int,
+    y_min: int,
+    x_max: int,
+    y_max: int
+) -> np.ndarray:
+    time.sleep(0.2)
     return image
+def sam_inference(
+    image: np.ndarray,
+    x_min: int,
+    y_min: int,
+    x_max: int,
+    y_max: int
+) -> np.ndarray:
+    input_boxes = [[[x_min, y_min, x_max, y_max]]]
+    inputs = SAM_PROCESSOR(
+        Image.fromarray(image),
+        input_boxes=[input_boxes],
+        return_tensors="pt"
+    ).to(DEVICE)
+    with torch.no_grad():
+        outputs = SAM_MODEL(**inputs)
+    mask = SAM_PROCESSOR.image_processor.post_process_masks(
+        outputs.pred_masks.cpu(),
+        inputs["original_sizes"].cpu(),
+        inputs["reshaped_input_sizes"].cpu()
+    )[0][0][0].numpy()
+    mask = mask[np.newaxis, ...]
+    detections = sv.Detections(xyxy=sv.mask_to_xyxy(masks=mask), mask=mask)
+    return annotate_image(image=image, detections=detections)
+def inference(
+    image: np.ndarray,
+    x_min: int,
+    y_min: int,
+    x_max: int,
+    y_max: int
+) -> Tuple[np.ndarray, np.ndarray]:
+    return (
+        efficient_sam_inference(image, x_min, y_min, x_max, y_max),
+        sam_inference(image, x_min, y_min, x_max, y_max)
+    )
 with gr.Blocks() as demo:
     gr.Markdown(MARKDOWN)
+    with gr.Tab(label="Box prompt"):
+        with gr.Row():
+            with gr.Column():
+                input_image = gr.Image()
+                with gr.Accordion(label="Box", open=False):
+                    with gr.Row():
+                        x_min_number = gr.Number(label="x_min")
+                        y_min_number = gr.Number(label="y_min")
+                        x_max_number = gr.Number(label="x_max")
+                        y_max_number = gr.Number(label="y_max")
+            efficient_sam_output_image = gr.Image()
+            sam_output_image = gr.Image()
+        with gr.Row():
+            submit_button = gr.Button("Submit")
+        gr.Examples(
+            fn=inference,
+            examples=[
+                [
+                    'https://media.roboflow.com/notebooks/examples/dog.jpeg',
+                    69,
+                    247,
+                    624,
+                    930
+                ]
+            ],
+            inputs=[input_image, x_min_number, y_min_number, x_max_number, y_max_number],
+            outputs=[efficient_sam_output_image, sam_output_image],
+        )
+    submit_button.click(
+        efficient_sam_inference,
+        inputs=[input_image, x_min_number, y_min_number, x_max_number, y_max_number],
+        outputs=efficient_sam_output_image
+    )
     submit_button.click(
+        sam_inference,
+        inputs=[input_image, x_min_number, y_min_number, x_max_number, y_max_number],
+        outputs=sam_output_image
     )
 demo.launch(debug=False, show_error=True)

requirements.txt CHANGED Viewed

@@ -2,7 +2,7 @@
 torch
 torchvision
 gradio
 transformers
-supervision
-gradio-imageslider

 torch
 torchvision
+pillow
 gradio
 transformers
+supervision