import os import torch import numpy as np from fairseq import utils,tasks from utils.checkpoint_utils import load_model_ensemble_and_task from models.polyformer import PolyFormerModel import cv2 import torch import numpy as np from fairseq import utils, tasks from fairseq import checkpoint_utils from utils.eval_utils import eval_step from tasks.refcoco import RefcocoTask from models.polyformer import PolyFormerModel from PIL import Image from torchvision import transforms import cv2 import gradio as gr import math from io import BytesIO import base64 import re from demo import visual_grounding title = "PolyFormer-Visual_Grounding" description = "Gradio Demo for PolyFormer-Visual_Grounding. Upload your own image or click any one of the examples, " \ "and write a description about a certain object. " \ "Then click \"Submit\" and wait for the result of grounding. For help or to provide feedback, please contact: Hui Ding (@huidin)" article = "
" # examples = [['A bear astronaut in the space.jpeg', 'a bear astronaut in the space'], # ['A unicorn doing computer vision research.jpeg', 'a unicorn doing computer vision research'], # ['pig.jpeg', 'a pig robot preparing a delicious meal'], # ['otta.png', 'a gentleman otter in a 19th century portrait'], # ['pikachu.jpeg', 'a pikachu fine-dining with a view to the Eiffel Tower'], # ['A small cabin on top of a snowy mountain in the style of Disney artstation.jpeg', 'a small cabin on top of a snowy mountain in the style of Disney artstation'], # # ] examples = [] io = gr.Interface(fn=visual_grounding, inputs=[gr.inputs.Image(type='pil'), "textbox"], outputs=[gr.outputs.Image(label="output", type='numpy'), gr.outputs.Image(label="predicted mask", type='numpy')], title=title, description=description, article=article, examples=examples, allow_flagging=False, allow_screenshot=False) # io.launch(cache_examples=True) io.launch()