File size: 9,221 Bytes
b6396ac
 
 
 
 
 
 
f0a46b7
b6396ac
 
 
63c1053
1c48e2c
b6396ac
 
 
 
 
 
 
 
 
 
 
f7c79e5
b6396ac
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
import os 
import sys

os.system("pip install gdown")

os.system("pip install imutils")

os.system("python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'")

os.system("pip install git+https://github.com/cocodataset/panopticapi.git")

os.system("python fcclip/modeling/pixel_decoder/ops/setup.py build install")

import gradio as gr
# check pytorch installation: 
from detectron2.utils.logger import setup_logger
from contextlib import ExitStack
# import some common libraries
import numpy as np
import cv2
import torch
import itertools
# import some common detectron2 utilities
from detectron2.config import get_cfg
from detectron2.utils.visualizer import ColorMode, random_color
from detectron2.data import MetadataCatalog
from detectron2.projects.deeplab import add_deeplab_config


coco_metadata = MetadataCatalog.get("coco_2017_val_panoptic")

# import FCCLIP project
from fcclip import add_maskformer2_config, add_fcclip_config
from demo.predictor import DefaultPredictor, OpenVocabVisualizer
from PIL import Image
import imutils
import json

setup_logger()
logger = setup_logger(name="fcclip")

cfg = get_cfg()
cfg.MODEL.DEVICE='cpu'
add_deeplab_config(cfg)
add_maskformer2_config(cfg)
add_fcclip_config(cfg)
cfg.merge_from_file("configs/coco/panoptic-segmentation/fcclip/fcclip_convnext_large_eval_ade20k.yaml")
os.system("gdown 1-91PIns86vyNaL3CzMmDD39zKGnPMtvj")
cfg.MODEL.WEIGHTS = './fcclip_cocopan.pth'
cfg.MODEL.KMAX_DEEPLAB.TEST.SEMANTIC_ON = False
cfg.MODEL.KMAX_DEEPLAB.TEST.INSTANCE_ON = False
cfg.MODEL.KMAX_DEEPLAB.TEST.PANOPTIC_ON = True
predictor = DefaultPredictor(cfg)

# def inference(img):
#     im = cv2.imread(img)
#     #im = imutils.resize(im, width=512)
#     outputs = predictor(im)
#     v = OpenVocabVisualizer(im[:, :, ::-1], coco_metadata, scale=1.2, instance_mode=ColorMode.IMAGE_BW)
#     panoptic_result = v.draw_panoptic_seg(outputs["panoptic_seg"][0].to("cpu"), outputs["panoptic_seg"][1]).get_image()
#     return Image.fromarray(np.uint8(panoptic_result)).convert('RGB')
    

title = "FC-CLIP"
description = """Gradio demo for FC-CLIP. To use it, simply upload your image, or click one of the examples to load them. FC-CLIP could perform open vocabulary segmentation, you may input more classes (separate by comma).
The expected format is 'a1,a2;b1,b2', where a1,a2 are synonyms vocabularies for the first class. 
The first word will be displayed as the class name.Read more at the links below."""

article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2207.04044' target='_blank'>kMaX-DeepLab</a> | <a href='https://github.com/google-research/deeplab2' target='_blank'>Github Repo</a></p>"

examples = [
    [
        "demo/examples/coco.jpg",
        "black pickup truck, pickup truck; blue sky, sky",
        ["COCO (133 categories)", "ADE (150 categories)", "LVIS (1203 categories)"],
    ],
    [
        "demo/examples/ade.jpg",
        "luggage, suitcase, baggage;handbag",
        ["ADE (150 categories)"],
    ],
    [
        "demo/examples/ego4d.jpg",
        "faucet, tap; kitchen paper, paper towels",
        ["COCO (133 categories)"],
    ],
]


coco_metadata = MetadataCatalog.get("openvocab_coco_2017_val_panoptic_with_sem_seg")
ade20k_metadata = MetadataCatalog.get("openvocab_ade20k_panoptic_val")
lvis_classes = open("./fcclip/data/datasets/lvis_1203_with_prompt_eng.txt", 'r').read().splitlines()
lvis_classes = [x[x.find(':')+1:] for x in lvis_classes]
lvis_colors = list(
    itertools.islice(itertools.cycle(coco_metadata.stuff_colors), len(lvis_classes))
)
# rerrange to thing_classes, stuff_classes
coco_thing_classes = coco_metadata.thing_classes
coco_stuff_classes = [x for x in coco_metadata.stuff_classes if x not in coco_thing_classes]
coco_thing_colors = coco_metadata.thing_colors
coco_stuff_colors = [x for x in coco_metadata.stuff_colors if x not in coco_thing_colors]
ade20k_thing_classes = ade20k_metadata.thing_classes
ade20k_stuff_classes = [x for x in ade20k_metadata.stuff_classes if x not in ade20k_thing_classes]
ade20k_thing_colors = ade20k_metadata.thing_colors
ade20k_stuff_colors = [x for x in ade20k_metadata.stuff_colors if x not in ade20k_thing_colors]

def build_demo_classes_and_metadata(vocab, label_list):
    extra_classes = []

    if vocab:
        for words in vocab.split(";"):
            extra_classes.append([word.strip() for word in words.split(",")])
    extra_colors = [random_color(rgb=True, maximum=1) for _ in range(len(extra_classes))]

    demo_thing_classes = extra_classes
    demo_stuff_classes = []
    demo_thing_colors = extra_colors
    demo_stuff_colors = []

    if any("COCO" in label for label in label_list):
        demo_thing_classes += coco_thing_classes
        demo_stuff_classes += coco_stuff_classes
        demo_thing_colors += coco_thing_colors
        demo_stuff_colors += coco_stuff_colors
    if any("ADE" in label for label in label_list):
        demo_thing_classes += ade20k_thing_classes
        demo_stuff_classes += ade20k_stuff_classes
        demo_thing_colors += ade20k_thing_colors
        demo_stuff_colors += ade20k_stuff_colors
    if any("LVIS" in label for label in label_list):
        demo_thing_classes += lvis_classes
        demo_thing_colors += lvis_colors

    MetadataCatalog.pop("fcclip_demo_metadata", None)
    demo_metadata = MetadataCatalog.get("fcclip_demo_metadata")
    demo_metadata.thing_classes = [c[0] for c in demo_thing_classes]
    demo_metadata.stuff_classes = [
        *demo_metadata.thing_classes,
        *[c[0] for c in demo_stuff_classes],
    ]
    demo_metadata.thing_colors = demo_thing_colors
    demo_metadata.stuff_colors = demo_thing_colors + demo_stuff_colors
    demo_metadata.stuff_dataset_id_to_contiguous_id = {
        idx: idx for idx in range(len(demo_metadata.stuff_classes))
    }
    demo_metadata.thing_dataset_id_to_contiguous_id = {
        idx: idx for idx in range(len(demo_metadata.thing_classes))
    }

    demo_classes = demo_thing_classes + demo_stuff_classes

    return demo_classes, demo_metadata


def inference(image_path, vocab, label_list):

    logger.info("building class names")
    demo_classes, demo_metadata = build_demo_classes_and_metadata(vocab, label_list)
    predictor.set_metadata(demo_metadata)

    im = cv2.imread(image_path)
    outputs = predictor(im)
    v = OpenVocabVisualizer(im[:, :, ::-1], demo_metadata, scale=1.2, instance_mode=ColorMode.IMAGE_BW)
    panoptic_result = v.draw_panoptic_seg(outputs["panoptic_seg"][0].to("cpu"), outputs["panoptic_seg"][1]).get_image()
    return Image.fromarray(np.uint8(panoptic_result)).convert('RGB')

    
with gr.Blocks(title=title) as demo:
    gr.Markdown("<h1 style='text-align: center; margin-bottom: 1rem'>" + title + "</h1>")
    gr.Markdown(description)
    input_components = []
    output_components = []

    with gr.Row():
        output_image_gr = gr.outputs.Image(label="Panoptic Segmentation", type="pil")
        output_components.append(output_image_gr)

    with gr.Row().style(equal_height=True, mobile_collapse=True):
        with gr.Column(scale=3, variant="panel") as input_component_column:
            input_image_gr = gr.inputs.Image(type="filepath")
            extra_vocab_gr = gr.inputs.Textbox(default="", label="Extra Vocabulary")
            category_list_gr = gr.inputs.CheckboxGroup(
                choices=["COCO (133 categories)", "ADE (150 categories)", "LVIS (1203 categories)"],
                default=["COCO (133 categories)", "ADE (150 categories)", "LVIS (1203 categories)"],
                label="Category to use",
            )
            input_components.extend([input_image_gr, extra_vocab_gr, category_list_gr])

        with gr.Column(scale=2):
            examples_handler = gr.Examples(
                examples=examples,
                inputs=[c for c in input_components if not isinstance(c, gr.State)],
                outputs=[c for c in output_components if not isinstance(c, gr.State)],
                fn=inference,
                cache_examples=torch.cuda.is_available(),
                examples_per_page=5,
            )
            with gr.Row():
                clear_btn = gr.Button("Clear")
                submit_btn = gr.Button("Submit", variant="primary")

    gr.Markdown(article)

    submit_btn.click(
        inference,
        input_components,
        output_components,
        api_name="predict",
        scroll_to_output=True,
    )

    clear_btn.click(
        None,
        [],
        (input_components + output_components + [input_component_column]),
        _js=f"""() => {json.dumps(
                    [component.cleared_value if hasattr(component, "cleared_value") else None
                     for component in input_components + output_components] + (
                        [gr.Column.update(visible=True)]
                    )
                    + ([gr.Column.update(visible=False)])
                )}
                """,
    )

demo.launch()


# gr.Interface(inference, inputs=gr.inputs.Image(type="filepath"), outputs=gr.outputs.Image(label="Panoptic segmentation",type="pil"), title=title,
#     description=description,
#     article=article,
#     examples=examples).launch(enable_queue=True)