File size: 4,216 Bytes
971be17
2545b8c
e40c511
 
 
 
 
 
 
 
 
 
 
 
080429a
 
e40c511
 
 
749745d
 
 
e40c511
 
0443d1a
 
e40c511
0443d1a
 
e40c511
 
 
 
 
 
 
 
7cb4a86
 
fd60bd4
 
3c3d474
7cb4a86
 
 
 
 
 
 
 
e40c511
e62892f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65beda1
 
ed16863
65beda1
 
 
 
 
 
 
971be17
b39562c
7f2ed69
b39562c
 
971be17
65beda1
 
e62892f
 
 
e40c511
 
 
 
65beda1
 
e40c511
971be17
e40c511
ad108ae
e40c511
 
 
971be17
e40c511
7cb4a86
 
971be17
7cb4a86
e62892f
 
 
 
 
 
 
 
e40c511
 
4798672
 
d56ec3c
4798672
e40c511
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
# Reference: https://huggingface.co/spaces/haotiz/glip-zeroshot-demo/blob/main/app.py 

import requests
import os
from io import BytesIO
from PIL import Image
import numpy as np
from pathlib import Path
import gradio as gr

import warnings

warnings.filterwarnings("ignore")

os.system("python setup.py build develop --user")

from maskrcnn_benchmark.config import cfg
from maskrcnn_benchmark.engine.predictor_glip import GLIPDemo

config_file = "configs/pretrain_new/desco_glip.yaml"
weight_file = "MODEL/desco_glip_tiny.pth"

# update the config options with the config file
# manual override some options
cfg.local_rank = 0
cfg.num_gpus = 1
cfg.merge_from_file(config_file)
cfg.merge_from_list(["MODEL.WEIGHT", weight_file])
cfg.merge_from_list(["MODEL.DEVICE", "cuda"])

glip_demo = GLIPDemo(
    cfg,
    min_image_size=800,
    confidence_threshold=0.7,
    show_mask_heatmaps=False
)

config_file = "configs/pretrain_new/desco_fiber.yaml"
weight_file = "MODEL/desco_fiber_base.pth"
from copy import deepcopy
cfg = deepcopy(cfg)
cfg.merge_from_file(config_file)
cfg.merge_from_list(["MODEL.WEIGHT", weight_file])
cfg.merge_from_list(["MODEL.DEVICE", "cuda"])
fiber_demo = GLIPDemo(
    cfg,
    min_image_size=800,
    confidence_threshold=0.7,
    show_mask_heatmaps=False
)

config_file = "configs/pretrain_new/desco_glip.yaml"
weight_file = "MODEL/local1.pth"
from copy import deepcopy
cfg = deepcopy(cfg)
cfg.merge_from_file(config_file)
cfg.merge_from_list(["MODEL.WEIGHT", weight_file])
cfg.merge_from_list(["MODEL.DEVICE", "cuda"])
local_demo1 = GLIPDemo(
    cfg,
    min_image_size=800,
    confidence_threshold=0.7,
    show_mask_heatmaps=False
)

config_file = "configs/pretrain_new/desco_glip.yaml"
weight_file = "MODEL/local2.pth"
from copy import deepcopy
cfg = deepcopy(cfg)
cfg.merge_from_file(config_file)
cfg.merge_from_list(["MODEL.WEIGHT", weight_file])
cfg.merge_from_list(["MODEL.DEVICE", "cuda"])
local_demo2 = GLIPDemo(
    cfg,
    min_image_size=800,
    confidence_threshold=0.7,
    show_mask_heatmaps=False
)

athetics_params = {
    "skip_name": False, # whether we overlay the phrase over the box
    "override_color": (0, 90, 190),
    "text_size": 1.0,
    "text_pixel": 3,
    "box_alpha": 1.0,
    "box_pixel": 5,
    "text_offset_original": 8, # distance between text and box
}

def predict(image, text, ground_tokens=""):
    img_len = min(image.shape[:2])
    import math
    athetics_params["text_size"] = math.ceil(img_len/1000)
    athetics_params["text_pixel"] = math.ceil(img_len/1000*3)
    ground_tokens = None if ground_tokens.strip() == "" else ground_tokens.strip().split(";")
    result, _ = glip_demo.run_on_web_image(deepcopy(image[:, :, [2, 1, 0]]), text, 0.5, ground_tokens, **athetics_params)
    fiber_result, _ = fiber_demo.run_on_web_image(deepcopy(image[:, :, [2, 1, 0]]), text, 0.5, ground_tokens, **athetics_params)
    local_result1, _ = local_demo1.run_on_web_image(deepcopy(image[:, :, [2, 1, 0]]), text, 0.5, ground_tokens, **athetics_params)
    local_result2, _ = local_demo2.run_on_web_image(deepcopy(image[:, :, [2, 1, 0]]), text, 0.5, ground_tokens, **athetics_params)
    return result[:, :, [2, 1, 0]], fiber_result[:, :, [2, 1, 0]], local_result1, local_result2


image = gr.inputs.Image()



gr.Interface(
    description="Object Recognition with DesCo (https://github.com/liunian-harold-li/DesCo)",
    fn=predict,
    inputs=["image", "text", "text"],
    outputs=[
        gr.outputs.Image(
            type="pil",
            label="DesCo-GLIP"
        ),
        gr.outputs.Image(
            type="pil",
            label="DesCo-FIBER"
        ),
        gr.outputs.Image(
            type="pil",
            label="local1"
        ),
        gr.outputs.Image(
            type="pil",
            label="local2"
        ),
    ],
    examples=[
        ["./1.jpg", "A clown making a balloon animal for a pretty lady.", "clown"],
        ["./1.jpg", "A clown kicking a soccer ball for a pretty lady.", "clown"],
        ["./2.jpg", "A kind of tool, wooden handle with a round head.", "tool"],
        ["./3.jpg", "Bumblebee, yellow with black accents.", "Bumblebee"],
    ],
    article=Path("docs/intro.md").read_text()
).launch()