File size: 8,082 Bytes
387437d
30054d4
 
2de53c1
387437d
 
 
30054d4
 
387437d
 
 
 
 
 
 
640ca35
d4c980e
c49eecb
2340a4f
d487a5e
2340a4f
387437d
 
 
 
 
 
 
d487a5e
387437d
 
 
 
6a7fd94
387437d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8caf4f4
387437d
 
 
f3a085f
 
387437d
976e28b
8caf4f4
387437d
 
 
 
 
 
 
 
 
30054d4
387437d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2923c34
 
b59ff9e
387437d
 
2923c34
 
b59ff9e
2923c34
387437d
 
 
 
70d406f
387437d
8152903
8caf4f4
02a14c5
f244627
763fdfb
 
387437d
 
 
 
 
738cf89
949104f
02a14c5
 
 
 
 
 
60f3746
738cf89
02a14c5
949104f
02a14c5
 
 
 
 
 
 
 
 
 
 
 
 
 
60f3746
738cf89
387437d
949104f
02a14c5
 
 
 
 
387437d
02a14c5
 
 
 
60f3746
738cf89
 
 
02a14c5
387437d
 
 
 
 
 
 
 
 
 
 
 
 
 
b59ff9e
 
387437d
 
 
 
2923c34
 
02a14c5
 
387437d
 
60f3746
4578888
d779e0c
02a14c5
4578888
d779e0c
02a14c5
4578888
d779e0c
387437d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8152903
387437d
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
import os
import sys
import cv2
import json
import argparse
import numpy as np
from tqdm import tqdm
from PIL import Image
import gradio as gr

import torch
import torch.nn.functional as F
from torch.utils import data
import torchvision.transforms as transform


sys.path.insert(0, "third_party/CLIP/")
os.system(f"pip3 install -Ue third_party/CLIP/")

os.system(f"pip install git+https://github.com/facebookresearch/detectron2.git")


# import some common detectron2 utilities
from detectron2.config import CfgNode as CN
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer, ColorMode
from detectron2.data import MetadataCatalog
from detectron2.utils.file_io import PathManager
from detectron2.utils.logger import setup_logger
from detectron2.projects.deeplab import add_deeplab_config
from detectron2.structures import Boxes, ImageList, Instances, BitMasks

# import Mask2Former project
from mask2former import add_mask_former_config

setup_logger()
logger = setup_logger(name="freeseg")


class Predictor(DefaultPredictor):

    def forward(self, original_image, labels=None):
        with torch.no_grad(): 
            # Apply pre-processing to image.
            if self.input_format == "RGB":
                # whether the model expects BGR inputs or RGB
                original_image = original_image[:, :, ::-1]
            height, width = original_image.shape[:2]
            image = self.aug.get_transform(original_image).apply_image(original_image)
            image = torch.as_tensor(image.astype("float32").transpose(2, 0, 1))

            inputs = {"image": image, "height": height, "width": width}
            predictions = self.model([inputs], labels)[0]
            return predictions


def create_predictor(task_names):
    cfg = get_cfg()
    add_deeplab_config(cfg)
    add_mask_former_config(cfg)
    cfg.merge_from_file("configs/coco-stuff-164k-156/mask2former_R101c_alltask_bs32_60k.yaml")
    cfg.MODEL.WEIGHTS = 'checkpoints/model_demo.pth'
    cfg.MODEL.MASK_FORMER.TEST.SEMANTIC_ON = True
    cfg.MODEL.DEVICE = "cpu"
    cfg.INPUT.TASK_NAME = [task.lower() for task in task_names]
    predictor = Predictor(cfg)

    return predictor



"""
# FreeSeg Demo
"""

title = "FreeSeg"
description = """
<p style='text-align: center'> <a href='https://freeseg.github.io/' target='_blank'>Project Page</a> | <a href='https://arxiv.org/abs/2303.17225' target='_blank'>Paper</a> | <a href='https://github.com/bytedance/FreeSeg' target='_blank'>Code</a> </p>

Gradio demo for FreeSeg: Unified, Universal and Open-Vocabulary Image Segmentation. \n
You may click on of the examples or upload your own image. \n

"""  # noqa

article = """
<p style='text-align: center'><a href='https://arxiv.org/abs/2303.17225' target='_blank'>FreeSeg: Unified, Universal and Open-Vocabulary Image Segmentation</a> | <a href='https://github.com/bytedance/FreeSeg' target='_blank'>Github Repo</a></p>
"""  # noqa

examples = [
    [
        "examples/cat.jpg",
        "cat, grass, stone, other",
        ["Semantic segmentation."],
    ],
    [
        "examples/bus.jpg",
        "bus, person, road, building, tree, sky, other",
        ["Semantic segmentation.", "Instance segmentation.", "Panoptic segmentation."],
    ]
]



def inference(image_path, labels, task_list):

    labels = [lbl.strip() for lbl in labels.split(",")]
    predictor = create_predictor(task_list)

    coco_metadata = MetadataCatalog.get("coco_2017_test_full_task")
    coco_metadata.stuff_classes[:len(labels)] = labels

    image = Image.open(image_path)
    # image = np.array(image)
    image = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
    outputs = predictor.forward(image, labels)

    results = {"sem_seg": None, "ins_seg": None, "pan_seg": None}
    if "Semantic segmentation." in task_list:
        sem_seg_out = outputs["sem_seg"].argmax(0).to("cpu")
        
        image_back = np.zeros_like(image)
        v = Visualizer(image[:, :, ::-1], coco_metadata, scale=0.6, instance_mode=ColorMode.IMAGE)
        semantic_result = v.draw_sem_seg(sem_seg_out, alpha=0.6).get_image()
        
        semantic_result = Image.fromarray(semantic_result)
        results["sem_seg"] = semantic_result

    if "Panoptic segmentation." in task_list:
        coco_metadata.thing_classes[:len(labels)] = labels

        panvis = Visualizer(
                    image[:, :, ::-1],
                    coco_metadata,
                    scale=0.6, 
                    instance_mode=ColorMode.IMAGE
                )
        panoptic_seg, segments_info = outputs["panoptic_seg"]
        
        panvis_output = panvis.draw_panoptic_seg_predictions(
            panoptic_seg.cpu(), segments_info, alpha=0.6
        )
        
        panvis_output = Image.fromarray(panvis_output.get_image())
        results["pan_seg"] = panvis_output
    
    if "Instance segmentation." in task_list:
        insvis = Visualizer(
            image[:, :, ::-1],
            coco_metadata,
            scale=0.6, 
            instance_mode=ColorMode.SEGMENTATION
        )
        instances = outputs["instances"].to(torch.device("cpu"))
        
        insvis_output = insvis.draw_instance_predictions(predictions=instances)

        insvis_output = Image.fromarray(insvis_output.get_image())
        results["ins_seg"] = insvis_output
    
    return results["sem_seg"], results["ins_seg"], results["pan_seg"]

    

with gr.Blocks(title=title) as demo:
    gr.Markdown("<h1 style='text-align: center; margin-bottom: 1rem'>" + title + "</h1>")
    gr.Markdown(description)
    input_components = []
    output_components = []


    with gr.Row().style(equal_height=True, mobile_collapse=True):
        with gr.Column(scale=3, variant="panel") as input_component_column:
            input_image_gr = gr.inputs.Image(type="filepath")
            labels_gr = gr.inputs.Textbox(default="", label="Class labels")
            task_list_gr = gr.inputs.CheckboxGroup(
                choices=["Semantic segmentation.", "Instance segmentation.", "Panoptic segmentation."],
                default=["Semantic segmentation."],
                label="Task names",
            )
            input_components.extend([input_image_gr, labels_gr, task_list_gr])

    with gr.Row():
        submit_btn = gr.Button("Submit", variant="primary")
        clear_btn = gr.Button("Clear")
        

    with gr.Row():
        with gr.Row(scale=3, variant="panel") as output_component_row:
            output_image_sem_gr = gr.outputs.Image(label="Semantic segmentation", type="pil")
            output_components.append(output_image_sem_gr)

            output_image_ins_gr = gr.outputs.Image(label="Instance segmentation", type="pil")
            output_components.append(output_image_ins_gr)

            output_image_pan_gr = gr.outputs.Image(label="Panoptic segmentation", type="pil")
            output_components.append(output_image_pan_gr)
    
    with gr.Column(scale=2):
        examples_handler = gr.Examples(
            examples=examples,
            inputs=[c for c in input_components if not isinstance(c, gr.State)],
            outputs=[c for c in output_components if not isinstance(c, gr.State)],
            fn=inference,
            cache_examples=torch.cuda.is_available(),
            examples_per_page=5,
        )

    gr.Markdown(article)

    submit_btn.click(
        inference,
        input_components,
        output_components,
        api_name="predict",
        scroll_to_output=True,
    )

    clear_btn.click(
        None,
        [],
        (input_components + output_components + [input_component_column]),
        _js=f"""() => {json.dumps(
                    [component.cleared_value if hasattr(component, "cleared_value") else None
                     for component in input_components + output_components] + (
                        [gr.Column.update(visible=True)]
                    )
                    + ([gr.Column.update(visible=False)])
                )}
                """,
    )

demo.launch()