Spaces:

sled-umich
/

OctoBERT

Sleeping

App Files Files Community

OctoBERT / app.py

Jiayi-Pan

Update app.py

1f4c39e over 1 year ago

raw

history blame

1.91 kB

	import torch, PIL
	import gradio as gr

	title = "OctoBERT"
	description = """Interactive Demo for OctoBERT. This base model is trained only on Flickr-30k."""
	examples =[
	['swing.jpg','The woman stands outdoors, next to a child in a <mask>.'],
	['tennis.jpg', 'A woman in blue shorts and white shirt holds a tennis racket on a blue <mask> court.'],
	['birthday.jpg', 'The smiling <mask> is celebrating her <mask> party with friends, surrounded by balloons and a <mask> with candles.'],
	['skate.jpg', 'A person in a rainbow colored snowsuit is snowboarding down a <mask> slope.'],
	['street.jpg', 'A man with <mask> plays with a little girl while walking down the street, while an Asian woman walks ahead of them.'],
	['dog.jpg', 'A black dog stands on a <mask>, green fields behind him.'],
	]
	device = "cuda" if torch.cuda.is_available() else "cpu"
	model, img_transform, tokenizer, post_processor, plot_results = torch.hub.load('Jiayi-Pan/RefCloze_Pub', 'flickr_base_model', force_reload=True)
	# model, img_transform, tokenizer, post_processor, plot_results = torch.hub.load('.', 'flickr_base_model', source='local')
	model = model.to(device)

	def plot_inference(img, caption):
	imgs_tensor = img_transform(img).to(device).unsqueeze(0)
	tokens_tensor = tokenizer(caption, return_tensors="pt").to(device)
	with torch.no_grad():
	outputs = model(imgs_tensor, tokens_tensor, one_pass=True)
	processed_outputs = post_processor(outputs, img, tokenizer)
	vis = plot_results(img, processed_outputs, save_path="numpy_array")
	return vis, processed_outputs['cap']


	gr.Interface(
	plot_inference,
	[gr.inputs.Image(type="pil", label="Input"), gr.inputs.Textbox(label="input text")],
	[gr.outputs.Image(type="numpy", label="Output"), gr.outputs.Textbox(label="Predicted Words")],
	title=title,
	description=description,
	examples=examples,
	cache_examples=True,
	).launch()