Spaces:

Roboflow
/

SoM

Sleeping

App Files Files Community

SkalskiP commited on Nov 26, 2023

Commit

7e50af9

1 Parent(s): 54c9770

Updated the 'sam_utils.py' and 'app.py' modules to implement automated mask generation, result highlighting and mark generation functionalities.

Browse files

Files changed (3) hide show

Dockerfile +1 -1
app.py +69 -24
sam_utils.py +10 -1

Dockerfile CHANGED Viewed

@@ -31,7 +31,7 @@ WORKDIR $HOME/app
 RUN pip install torch==2.0.1+cu117 torchvision==0.15.2+cu117 -f https://download.pytorch.org/whl/torch_stable.html
 # Install dependencies
-RUN pip install --no-cache-dir gradio==3.50.2 opencv-python supervision==0.17.0rc3 \
     pillow requests
 # Install SAM and Detectron2

 RUN pip install torch==2.0.1+cu117 torchvision==0.15.2+cu117 -f https://download.pytorch.org/whl/torch_stable.html
 # Install dependencies
+RUN pip install --no-cache-dir gradio==3.50.2 opencv-python supervision==0.17.0rc4 \
     pillow requests
 # Install SAM and Detectron2

app.py CHANGED Viewed

@@ -1,16 +1,16 @@
 import os
-from typing import List, Dict, Tuple, Any
 import cv2
 import gradio as gr
 import numpy as np
 import supervision as sv
 import torch
-from segment_anything import sam_model_registry, SamAutomaticMaskGenerator
 from gpt4v import prompt_image
-from utils import postprocess_masks, Visualizer
-from sam_utils import sam_interactive_inference
 HOME = os.getenv("HOME")
 DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
@@ -19,6 +19,9 @@ SAM_CHECKPOINT = os.path.join(HOME, "app/weights/sam_vit_h_4b8939.pth")
 # SAM_CHECKPOINT = "weights/sam_vit_h_4b8939.pth"
 SAM_MODEL_TYPE = "vit_h"
 MARKDOWN = """
 [![arXiv](https://img.shields.io/badge/arXiv-1703.06870v3-b31b1b.svg)](https://arxiv.org/pdf/2310.11441.pdf)
@@ -34,7 +37,6 @@ MARKDOWN = """
 - [ ] Support for alphabetic labels
 - [ ] Support for Semantic-SAM (multi-level)
-- [ ] Support for result highlighting
 - [ ] Support for mask filtering based on granularity
 """
@@ -45,7 +47,7 @@ def inference(
     image_and_mask: Dict[str, np.ndarray],
     annotation_mode: List[str],
     mask_alpha: float
-) -> Tuple[Tuple[np.ndarray, List[Any]], sv.Detections]:
     image = image_and_mask['image']
     mask = cv2.cvtColor(image_and_mask['mask'], cv2.COLOR_RGB2GRAY)
     is_interactive = not np.all(mask == 0)
@@ -56,9 +58,10 @@ def inference(
             mask=mask,
             model=SAM)
     else:
-        mask_generator = SamAutomaticMaskGenerator(SAM)
-        result = mask_generator.generate(image=image)
-        detections = sv.Detections.from_sam(result)
         detections = postprocess_masks(
             detections=detections)
     bgr_image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
@@ -69,23 +72,54 @@ def inference(
         with_mask="Mask" in annotation_mode,
         with_polygon="Polygon" in annotation_mode,
         with_label="Mark" in annotation_mode)
-    return (cv2.cvtColor(annotated_image, cv2.COLOR_BGR2RGB), []), detections
-def prompt(message, history, image: np.ndarray, api_key: str) -> str:
     if api_key == "":
         return "⚠️ Please set your OpenAI API key first"
-    if image is None:
         return "⚠️ Please generate SoM visual prompt first"
     return prompt_image(
         api_key=api_key,
-        image=cv2.cvtColor(image, cv2.COLOR_BGR2RGB),
         prompt=message
     )
 def on_image_input_clear():
-    return None, None
 image_input = gr.Image(
@@ -106,7 +140,12 @@ slider_mask_alpha = gr.Slider(
     value=0.05,
     label="Mask Alpha")
 image_output = gr.AnnotatedImage(
-    label="SoM Visual Prompt")
 openai_api_key = gr.Textbox(
     show_label=False,
     placeholder="Before you start chatting, set your OpenAI API key here",
@@ -115,11 +154,12 @@ openai_api_key = gr.Textbox(
 chatbot = gr.Chatbot(
     label="GPT-4V + SoM",
     height=256)
-run_button = gr.Button("Run")
 with gr.Blocks() as demo:
     gr.Markdown(MARKDOWN)
-    detections_state = gr.State()
     with gr.Row():
         with gr.Column():
             image_input.render()
@@ -132,22 +172,27 @@ with gr.Blocks() as demo:
                     slider_mask_alpha.render()
         with gr.Column():
             image_output.render()
-            run_button.render()
     with gr.Row():
         openai_api_key.render()
     with gr.Row():
         gr.ChatInterface(
             chatbot=chatbot,
             fn=prompt,
-            additional_inputs=[image_output, openai_api_key])
-    run_button.click(
         fn=inference,
         inputs=[image_input, checkbox_annotation_mode, slider_mask_alpha],
-        outputs=[image_output, detections_state])
     image_input.clear(
         fn=on_image_input_clear,
-        outputs=[image_output, detections_state]
     )
 demo.queue().launch(debug=False, show_error=True)

 import os
+from typing import List, Dict, Tuple, Any, Optional
 import cv2
 import gradio as gr
 import numpy as np
 import supervision as sv
 import torch
+from segment_anything import sam_model_registry
 from gpt4v import prompt_image
+from utils import postprocess_masks, Visualizer, extract_numbers_in_brackets
+from sam_utils import sam_interactive_inference, sam_inference
 HOME = os.getenv("HOME")
 DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
 # SAM_CHECKPOINT = "weights/sam_vit_h_4b8939.pth"
 SAM_MODEL_TYPE = "vit_h"
+ANNOTATED_IMAGE_KEY = "annotated_image"
+DETECTIONS_KEY = "detections"
 MARKDOWN = """
 [![arXiv](https://img.shields.io/badge/arXiv-1703.06870v3-b31b1b.svg)](https://arxiv.org/pdf/2310.11441.pdf)
 - [ ] Support for alphabetic labels
 - [ ] Support for Semantic-SAM (multi-level)
 - [ ] Support for mask filtering based on granularity
 """
     image_and_mask: Dict[str, np.ndarray],
     annotation_mode: List[str],
     mask_alpha: float
+) -> Tuple[Tuple[np.ndarray, List[Tuple[np.ndarray, str]]], Dict[str, Any]]:
     image = image_and_mask['image']
     mask = cv2.cvtColor(image_and_mask['mask'], cv2.COLOR_RGB2GRAY)
     is_interactive = not np.all(mask == 0)
             mask=mask,
             model=SAM)
     else:
+        detections = sam_inference(
+            image=image,
+            model=SAM
+        )
         detections = postprocess_masks(
             detections=detections)
     bgr_image = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
         with_mask="Mask" in annotation_mode,
         with_polygon="Polygon" in annotation_mode,
         with_label="Mark" in annotation_mode)
+    annotated_image = cv2.cvtColor(annotated_image, cv2.COLOR_BGR2RGB)
+    state = {
+        ANNOTATED_IMAGE_KEY: annotated_image,
+        DETECTIONS_KEY: detections
+    }
+    return (annotated_image, []), state
+def prompt(
+    message: str,
+    history: List[List[str]],
+    state: Dict[str, Any],
+    api_key: Optional[str]
+) -> str:
     if api_key == "":
         return "⚠️ Please set your OpenAI API key first"
+    if state is None or ANNOTATED_IMAGE_KEY not in state:
         return "⚠️ Please generate SoM visual prompt first"
     return prompt_image(
         api_key=api_key,
+        image=cv2.cvtColor(state[ANNOTATED_IMAGE_KEY], cv2.COLOR_BGR2RGB),
         prompt=message
     )
 def on_image_input_clear():
+    return None, {}
+def highlight(
+    state: Dict[str, Any],
+    history: List[List[str]]
+) -> Optional[Tuple[np.ndarray, List[Tuple[np.ndarray, str]]]]:
+    if DETECTIONS_KEY not in state or ANNOTATED_IMAGE_KEY not in state:
+        return None
+    detections: sv.Detections = state[DETECTIONS_KEY]
+    annotated_image: np.ndarray = state[ANNOTATED_IMAGE_KEY]
+    response = history[-1][-1]
+    detections_ids = extract_numbers_in_brackets(text=response)
+    highlighted_detections = [
+        (detections.mask[detection_id], str(detection_id))
+        for detection_id
+        in detections_ids
+    ]
+    return annotated_image, highlighted_detections
 image_input = gr.Image(
     value=0.05,
     label="Mask Alpha")
 image_output = gr.AnnotatedImage(
+    label="SoM Visual Prompt",
+    color_map={
+        str(i): sv.ColorPalette.default().by_idx(i).as_hex()
+        for i in range(64)
+    }
+)
 openai_api_key = gr.Textbox(
     show_label=False,
     placeholder="Before you start chatting, set your OpenAI API key here",
 chatbot = gr.Chatbot(
     label="GPT-4V + SoM",
     height=256)
+generate_button = gr.Button("Generate Marks")
+highlight_button = gr.Button("Highlight Marks")
 with gr.Blocks() as demo:
     gr.Markdown(MARKDOWN)
+    inference_state = gr.State({})
     with gr.Row():
         with gr.Column():
             image_input.render()
                     slider_mask_alpha.render()
         with gr.Column():
             image_output.render()
+            generate_button.render()
+            highlight_button.render()
     with gr.Row():
         openai_api_key.render()
     with gr.Row():
         gr.ChatInterface(
             chatbot=chatbot,
             fn=prompt,
+            additional_inputs=[inference_state, openai_api_key])
+    generate_button.click(
         fn=inference,
         inputs=[image_input, checkbox_annotation_mode, slider_mask_alpha],
+        outputs=[image_output, inference_state])
     image_input.clear(
         fn=on_image_input_clear,
+        outputs=[image_output, inference_state]
     )
+    highlight_button.click(
+        fn=highlight,
+        inputs=[inference_state, chatbot],
+        outputs=[image_output])
 demo.queue().launch(debug=False, show_error=True)

sam_utils.py CHANGED Viewed

@@ -2,7 +2,16 @@ import numpy as np
 import supervision as sv
 from segment_anything.modeling.sam import Sam
-from segment_anything import SamPredictor
 def sam_interactive_inference(

 import supervision as sv
 from segment_anything.modeling.sam import Sam
+from segment_anything import SamPredictor, SamAutomaticMaskGenerator
+def sam_inference(
+    image: np.ndarray,
+    model: Sam
+) -> sv.Detections:
+    mask_generator = SamAutomaticMaskGenerator(model)
+    result = mask_generator.generate(image=image)
+    return sv.Detections.from_sam(result)
 def sam_interactive_inference(