Spaces:

koajoel
/

PolyFormer

Running

PolyFormer / app.py

jiang

update demo images

867a29b almost 2 years ago

2.57 kB

	import os
	import torch
	import numpy as np
	from fairseq import utils,tasks
	from utils.checkpoint_utils import load_model_ensemble_and_task
	from models.polyformer import PolyFormerModel
	import cv2

	import torch
	import numpy as np
	from fairseq import utils, tasks
	from fairseq import checkpoint_utils
	from utils.eval_utils import eval_step
	from tasks.refcoco import RefcocoTask
	from models.polyformer import PolyFormerModel
	from PIL import Image
	from torchvision import transforms
	import cv2
	import gradio as gr
	import math
	from io import BytesIO
	import base64
	import re
	from demo import visual_grounding

	title = "PolyFormer for Visual Grounding"

	description = """<p style='text-align: center'> <a href='https://polyformer.github.io/' target='_blank'>Project Page</a> \| <a href='https://arxiv.org/pdf/2302.07387.pdf' target='_blank'>Paper</a> \| <a href='https://github.com/amazon-science/polygon-transformer' target='_blank'>Github Repo</a></p>
	<p style='text-align: left'> Demo of PolyFormer for referring image segmentation and referring expression comprehension. Upload your own image or click any one of the examples, and write a description about a certain object. Then click \"Submit\" and wait for the results.</p>
	<p>For faster inference without waiting in queue, you may duplicate the space and upgrade to GPU in settings.
	<br/>
	<a href="https://huggingface.co/spaces/koajoel/PolyFormer?duplicate=true">
	<img style="margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
	</p>
	"""

	examples = [['demo/vases.jpg', 'the blue vase on the left'],
	['demo/dog.jpg', 'the dog wearing glasses'],
	['demo/bear.jpeg', 'a bear astronaut in the space'],
	['demo/unicorn.jpeg', 'a unicorn doing computer vision research'],
	['demo/pig.jpeg', 'a pig robot preparing a delicious meal'],
	['demo/otta.png', 'a gentleman otter in a 19th century portrait'],
	['demo/pikachu.jpeg', 'a pikachu fine-dining with a view to the Eiffel Tower'],
	['demo/cabin.jpeg', 'a small cabin on top of a snowy mountain in the style of Disney art station']
	]
	io = gr.Interface(fn=visual_grounding, inputs=[gr.inputs.Image(type='pil'), "textbox"],
	outputs=[gr.outputs.Image(label="output", type='numpy'), gr.outputs.Image(label="predicted mask", type='numpy')],
	title=title, description=description, examples=examples,
	allow_flagging=False, allow_screenshot=False, cache_examples=False)
	io.launch()