Spaces:
Sleeping
Sleeping
# Reference: https://huggingface.co/spaces/haotiz/glip-zeroshot-demo/blob/main/app.py | |
import requests | |
import os | |
from io import BytesIO | |
from PIL import Image | |
import numpy as np | |
from pathlib import Path | |
import gradio as gr | |
import warnings | |
warnings.filterwarnings("ignore") | |
os.system("python setup.py build develop --user") | |
from maskrcnn_benchmark.config import cfg | |
from maskrcnn_benchmark.engine.predictor_glip import GLIPDemo | |
config_file = "configs/pretrain_new/desco_glip.yaml" | |
weight_file = "MODEL/desco_glip_tiny.pth" | |
# update the config options with the config file | |
# manual override some options | |
cfg.local_rank = 0 | |
cfg.num_gpus = 1 | |
cfg.merge_from_file(config_file) | |
cfg.merge_from_list(["MODEL.WEIGHT", weight_file]) | |
cfg.merge_from_list(["MODEL.DEVICE", "cuda"]) | |
glip_demo = GLIPDemo( | |
cfg, | |
min_image_size=800, | |
confidence_threshold=0.7, | |
show_mask_heatmaps=False | |
) | |
config_file = "configs/pretrain_new/desco_fiber.yaml" | |
weight_file = "MODEL/desco_fiber_base.pth" | |
from copy import deepcopy | |
cfg = deepcopy(cfg) | |
cfg.merge_from_file(config_file) | |
cfg.merge_from_list(["MODEL.WEIGHT", weight_file]) | |
cfg.merge_from_list(["MODEL.DEVICE", "cuda"]) | |
fiber_demo = GLIPDemo( | |
cfg, | |
min_image_size=800, | |
confidence_threshold=0.7, | |
show_mask_heatmaps=False | |
) | |
athetics_params = { | |
"skip_name": False, # whether we overlay the phrase over the box | |
"override_color": (0, 0, 0), # box color, default is white | |
"text_size": 1.0, | |
"text_pixel": 3, | |
"box_alpha": 1.0, | |
"box_pixel": 5, | |
"text_offset_original": 8, # distance between text and box | |
} | |
def predict(image, text, ground_tokens=""): | |
ground_tokens = None if ground_tokens.strip() == "" else ground_tokens.strip().split(";") | |
result, _ = glip_demo.run_on_web_image(deepcopy(image[:, :, [2, 1, 0]]), text, 0.5, ground_tokens, **athetics_params) | |
fiber_result, _ = fiber_demo.run_on_web_image(deepcopy(image[:, :, [2, 1, 0]]), text, 0.5, ground_tokens, **athetics_params) | |
return result[:, :, [2, 1, 0]], fiber_result[:, :, [2, 1, 0]] | |
image = gr.inputs.Image() | |
gr.Interface( | |
description="Object Recognition with DesCo (https://github.com/liunian-harold-li/DesCo)", | |
fn=predict, | |
inputs=["image", "text", "text"], | |
outputs=[ | |
gr.outputs.Image( | |
type="pil", | |
label="DesCo-GLIP" | |
), | |
gr.outputs.Image( | |
type="pil", | |
label="DesCo-FIBER" | |
), | |
], | |
examples=[ | |
["./1.jpg", "A clown making a balloon animal for a pretty lady.", "clown"], | |
["./1.jpg", "A clown kicking a soccer ball for a pretty lady.", "clown"], | |
["./2.jpg", "A kind of tool, wooden handle with a round head.", "tool"], | |
["./3.jpg", "Bumblebee, yellow with black accents.", "Bumblebee"], | |
], | |
article=Path("docs/intro.md").read_text() | |
).launch() | |