Spaces:

Roboflow
/

SoM

Running

SkalskiP commited on Nov 25, 2023

Commit

54c9770

1 Parent(s): f9e8ad9

Add number extraction feature and update instructions

Added 'extract_numbers_in_brackets' function in utils.py for efficient extraction of numbers enclosed in square brackets from a given string.

Files changed (3) hide show

app.py +16 -7
gpt4v.py +4 -1
utils.py +18 -0

app.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import os
-from typing import List, Dict
 import cv2
 import gradio as gr
@@ -16,6 +16,7 @@ HOME = os.getenv("HOME")
 DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
 SAM_CHECKPOINT = os.path.join(HOME, "app/weights/sam_vit_h_4b8939.pth")
 SAM_MODEL_TYPE = "vit_h"
 MARKDOWN = """
@@ -44,7 +45,7 @@ def inference(
     image_and_mask: Dict[str, np.ndarray],
     annotation_mode: List[str],
     mask_alpha: float
-) -> np.ndarray:
     image = image_and_mask['image']
     mask = cv2.cvtColor(image_and_mask['mask'], cv2.COLOR_RGB2GRAY)
     is_interactive = not np.all(mask == 0)
@@ -68,7 +69,7 @@ def inference(
         with_mask="Mask" in annotation_mode,
         with_polygon="Polygon" in annotation_mode,
         with_label="Mark" in annotation_mode)
-    return cv2.cvtColor(annotated_image, cv2.COLOR_BGR2RGB)
 def prompt(message, history, image: np.ndarray, api_key: str) -> str:
@@ -83,6 +84,10 @@ def prompt(message, history, image: np.ndarray, api_key: str) -> str:
     )
 image_input = gr.Image(
     label="Input",
     type="numpy",
@@ -100,9 +105,8 @@ slider_mask_alpha = gr.Slider(
     maximum=1,
     value=0.05,
     label="Mask Alpha")
-image_output = gr.Image(
-    label="SoM Visual Prompt",
-    type="numpy")
 openai_api_key = gr.Textbox(
     show_label=False,
     placeholder="Before you start chatting, set your OpenAI API key here",
@@ -115,6 +119,7 @@ run_button = gr.Button("Run")
 with gr.Blocks() as demo:
     gr.Markdown(MARKDOWN)
     with gr.Row():
         with gr.Column():
             image_input.render()
@@ -139,6 +144,10 @@ with gr.Blocks() as demo:
     run_button.click(
         fn=inference,
         inputs=[image_input, checkbox_annotation_mode, slider_mask_alpha],
-        outputs=image_output)
 demo.queue().launch(debug=False, show_error=True)

 import os
+from typing import List, Dict, Tuple, Any
 import cv2
 import gradio as gr
 DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
 SAM_CHECKPOINT = os.path.join(HOME, "app/weights/sam_vit_h_4b8939.pth")
+# SAM_CHECKPOINT = "weights/sam_vit_h_4b8939.pth"
 SAM_MODEL_TYPE = "vit_h"
 MARKDOWN = """
     image_and_mask: Dict[str, np.ndarray],
     annotation_mode: List[str],
     mask_alpha: float
+) -> Tuple[Tuple[np.ndarray, List[Any]], sv.Detections]:
     image = image_and_mask['image']
     mask = cv2.cvtColor(image_and_mask['mask'], cv2.COLOR_RGB2GRAY)
     is_interactive = not np.all(mask == 0)
         with_mask="Mask" in annotation_mode,
         with_polygon="Polygon" in annotation_mode,
         with_label="Mark" in annotation_mode)
+    return (cv2.cvtColor(annotated_image, cv2.COLOR_BGR2RGB), []), detections
 def prompt(message, history, image: np.ndarray, api_key: str) -> str:
     )
+def on_image_input_clear():
+    return None, None
 image_input = gr.Image(
     label="Input",
     type="numpy",
     maximum=1,
     value=0.05,
     label="Mask Alpha")
+image_output = gr.AnnotatedImage(
+    label="SoM Visual Prompt")
 openai_api_key = gr.Textbox(
     show_label=False,
     placeholder="Before you start chatting, set your OpenAI API key here",
 with gr.Blocks() as demo:
     gr.Markdown(MARKDOWN)
+    detections_state = gr.State()
     with gr.Row():
         with gr.Column():
             image_input.render()
     run_button.click(
         fn=inference,
         inputs=[image_input, checkbox_annotation_mode, slider_mask_alpha],
+        outputs=[image_output, detections_state])
+    image_input.clear(
+        fn=on_image_input_clear,
+        outputs=[image_output, detections_state]
+    )
 demo.queue().launch(debug=False, show_error=True)

gpt4v.py CHANGED Viewed

@@ -6,7 +6,10 @@ import numpy as np
 META_PROMPT = '''
-- For any marks mentioned in your answer, please highlight them with [].
 '''
 API_URL = "https://api.openai.com/v1/chat/completions"

 META_PROMPT = '''
+For any labels or markings on an image that you reference in your response, please
+enclose them in square brackets ([]) and list them explicitly. Do not use ranges; for
+example, instead of '1 - 4', list as '[1], [2], [3], [4]'. These labels could be
+numbers or letters and typically correspond to specific segments or parts of the image.
 '''
 API_URL = "https://api.openai.com/v1/chat/completions"

utils.py CHANGED Viewed

@@ -1,3 +1,6 @@
 import cv2
 import numpy as np
@@ -205,3 +208,18 @@ def postprocess_masks(
         xyxy=sv.mask_to_xyxy(masks),
         mask=masks
     )

+import re
+from typing import List
 import cv2
 import numpy as np
         xyxy=sv.mask_to_xyxy(masks),
         mask=masks
     )
+def extract_numbers_in_brackets(text: str) -> List[int]:
+    """
+    Extracts all numbers enclosed in square brackets from a given string.
+    Args:
+        text (str): The string to be searched.
+    Returns:
+        List[int]: A list of integers found within square brackets.
+    """
+    pattern = r'\[(\d+)\]'
+    numbers = [int(num) for num in re.findall(pattern, text)]
+    return numbers