File size: 1,753 Bytes
0c52132
03b9405
f6e3ce8
03b9405
 
 
f6e3ce8
0c52132
03b9405
f6e3ce8
0c52132
 
03b9405
f6e3ce8
0c52132
03b9405
f6e3ce8
03b9405
 
 
 
 
 
 
 
 
f6e3ce8
03b9405
 
f6e3ce8
 
0c52132
03b9405
f6e3ce8
 
0c52132
 
 
 
 
 
 
 
 
 
 
03b9405
f6e3ce8
03b9405
 
f6e3ce8
 
03b9405
0c52132
 
f6e3ce8
03b9405
 
f6e3ce8
0c52132
 
 
 
f6e3ce8
03b9405
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
import os
import torch

import gradio as gr
import numpy as np
import supervision as sv

from typing import List
from segment_anything import sam_model_registry, SamAutomaticMaskGenerator


HOME = os.getenv("HOME")
DEVICE = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

SAM_CHECKPOINT = os.path.join(HOME, "weights/sam_vit_h_4b8939.pth")
SAM_MODEL_TYPE = "vit_h"

MARKDOWN = """
<h1 style='text-align: center'>
    <img 
        src='https://som-gpt4v.github.io/website/img/som_logo.png' 
        style='height:50px; display:inline-block'
    />  
    Set-of-Mark (SoM) Prompting Unleashes Extraordinary Visual Grounding in GPT-4V
</h1>
"""

sam = sam_model_registry[SAM_MODEL_TYPE](checkpoint=SAM_CHECKPOINT).to(device=DEVICE)
mask_generator = SamAutomaticMaskGenerator(sam)


def inference(image: np.ndarray, annotation_mode: List[str]) -> np.ndarray:
    return image


image_input = gr.Image(
    label="Input",
    type="numpy")
checkbox_annotation_mode = gr.CheckboxGroup(
    choices=["Mark", "Mask", "Box"],
    value=['Mark'],
    label="Annotation Mode")
image_output = gr.Image(
    label="SoM Visual Prompt",
    type="numpy",
    height=512)
run_button = gr.Button("Run")

with gr.Blocks() as demo:
    gr.Markdown(MARKDOWN)
    with gr.Row():
        with gr.Column():
            image_input.render()
            with gr.Accordion(label="Detailed prompt settings (e.g., mark type)", open=False):
                checkbox_annotation_mode.render()
        with gr.Column():
            image_output.render()
            run_button.render()

    run_button.click(
        fn=inference,
        inputs=[image_input, checkbox_annotation_mode],
        outputs=image_output)

demo.queue().launch(debug=False, show_error=True)