Spaces:

WalidBouss
/

GEM

Running

App Files Files Community

GEM / app.py

WalidBouss

Initial commit :tada:

be1ec96 6 months ago

raw history blame contribute delete

No virus

2.87 kB

	from PIL import Image
	import numpy as np
	import cv2 as cv2
	import torch
	import requests

	import gradio as gr

	import gem


	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	# OpenCLIP
	model_name = 'ViT-B-16-quickgelu'
	pretrained = 'metaclip_400m'
	preprocess = gem.get_gem_img_transform()
	# global gem_model
	gem_model = gem.create_gem_model(model_name=model_name, pretrained=pretrained, device=device)
	image_source = "image"
	_MODELS = {
	"OpenAI": ('ViT-B-16', 'openai'),
	"MetaCLIP": ('ViT-B-16-quickgelu', 'metaclip_400m'),
	"OpenCLIP": ('ViT-B-16', 'laion400m_e32')
	}

	def change_weights(pretrained_weights):
	""" Handle changing model's weights triggered by a Dropdown module change."""
	curr_model = pretrained_weights
	_new_model = _MODELS[pretrained_weights]
	print(_new_model)
	global gem_model
	gem_model = gem.create_gem_model(model_name=_new_model[0], pretrained=_new_model[1], device=device)

	def change_to_url(url):
	img_pil = Image.open(requests.get(url, stream=True).raw).convert('RGB')
	return img_pil

	def viz_func(url, image, text, model_weights):
	image_torch = preprocess(image).unsqueeze(0).to(device)
	with torch.no_grad():
	logits = gem_model(image_torch, [text])
	logits = logits[0].detach().cpu().numpy()

	img_cv = cv2.cvtColor(np.array(image.resize((448, 448))), cv2.COLOR_RGB2BGR)
	logit_cs_viz = (logits * 255).astype('uint8')
	heat_maps_cs = [cv2.applyColorMap(logit, cv2.COLORMAP_JET) for logit in logit_cs_viz]

	vizs = [0.4 * img_cv + 0.6 * heat_map for heat_map in heat_maps_cs]
	vizs = [cv2.cvtColor(viz.astype('uint8'), cv2.COLOR_BGR2RGB) for viz in vizs]
	return vizs[0]

	inputs = [
	gr.Textbox(label="url to the image", ),
	gr.Image(type="pil"),
	gr.Textbox(label="Text Prompt"),
	gr.Dropdown(["OpenAI", "MetaCLIP", "OpenCLIP"], label="Pretrained Weights", value="MetaCLIP",
	info='It can take a few second for the model to be updated.'),
	]

	with gr.Blocks() as demo:
	inputs[-1].change(fn=change_weights, inputs=[inputs[-1]])
	inputs[0].change(fn=change_to_url, outputs=inputs[1], inputs=inputs[0])

	interact = gr.Interface(
	title="GEM: Grounding Everything Module (link to paper/code)",
	description="Grounding Everything: Emerging Localization Properties in Vision-Language Transformers",
	fn=viz_func,
	inputs=inputs,
	outputs=["image"],
	)

	gr.Examples(
	[
	["assets/cats_remote_control.jpeg", "cat"],
	["assets/cats_remote_control.jpeg", "remote control"],
	["assets/elon_jeff_mark.jpeg", "elon musk"],
	["assets/elon_jeff_mark.jpeg", "mark zuckerberg"],
	["assets/elon_jeff_mark.jpeg", "jeff bezos"],
	],
	[inputs[1], inputs[2]]
	)

	# demo.launch(server_port=5152)
	demo.launch()