Spaces:

hasibzunair
/

peekaboo-demo

Sleeping

App Files Files Community

peekaboo-demo / app.py

hasibzunair

add files

77ac391 5 months ago

raw

history blame

3.81 kB

	import os
	import torch
	import argparse
	import torch.nn as nn
	import torch.nn.functional as F
	import gradio as gr
	import codecs
	import numpy as np
	import cv2

	from PIL import Image
	from model import PeekabooModel
	from misc import load_config
	from torchvision import transforms as T

	NORMALIZE = T.Normalize((0.485, 0.456, 0.406), (0.229, 0.224, 0.225))


	def inference(img_path):
	# Load the image
	with open(img_path, "rb") as f:
	img = Image.open(f)
	img = img.convert("RGB")
	img_np = np.array(img)

	# Preprocess
	t = T.Compose([T.ToTensor(), NORMALIZE])
	img_t = t(img)[None, :, :, :]
	inputs = img_t.to(device)

	# Forward step
	print(f"Start Peekaboo prediction.")
	with torch.no_grad():
	preds = model(inputs, for_eval=True)
	print(f"Done Peekaboo prediction.")

	sigmoid = nn.Sigmoid()
	h, w = img_t.shape[-2:]
	preds_up = F.interpolate(
	preds,
	scale_factor=model.vit_patch_size,
	mode="bilinear",
	align_corners=False,
	)[..., :h, :w]
	preds_up = (sigmoid(preds_up.detach()) > 0.5).squeeze(0).float()
	preds_up = preds_up.cpu().squeeze().numpy()

	# Overlay predicted mask with input image
	preds_up_np = (preds_up / np.max(preds_up) * 255).astype(np.uint8)
	preds_up_np_3d = np.stack([preds_up_np, preds_up_np, preds_up_np], axis=-1)
	combined_image = cv2.addWeighted(img_np, 0.5, preds_up_np_3d, 0.5, 0)
	print(f"Output shape is {combined_image.shape}")
	return combined_image


	if __name__ == "__main__":
	parser = argparse.ArgumentParser(
	description="Evaluation of Peekaboo",
	formatter_class=argparse.ArgumentDefaultsHelpFormatter,
	)

	parser.add_argument(
	"--img-path",
	type=str,
	default="data/examples/VOC_000030.jpg",
	help="Image path.",
	)
	parser.add_argument(
	"--model-weights",
	type=str,
	default="data/weights/peekaboo_decoder_weights_niter500.pt",
	)
	parser.add_argument(
	"--config",
	type=str,
	default="configs/peekaboo_DUTS-TR.yaml",
	)
	parser.add_argument(
	"--output-dir",
	type=str,
	default="outputs",
	)
	args = parser.parse_args()

	# Configuration
	config, _ = load_config(args.config)
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

	# Load the model
	model = PeekabooModel(
	vit_model=config.model["pre_training"],
	vit_arch=config.model["arch"],
	vit_patch_size=config.model["patch_size"],
	enc_type_feats=config.peekaboo["feats"],
	)
	# Load weights
	model.decoder_load_weights(args.model_weights)
	model.eval()
	print(f"Model {args.model_weights} loaded correctly.")

	# App
	title = "PEEKABOO: Hiding Parts of an Image for Unsupervised Object Localization"
	description = codecs.open("./media/description.html", "r", "utf-8").read()
	article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2407.17628' target='_blank'>PEEKABOO: Hiding Parts of an Image for Unsupervised Object Localization</a> \| <a href='https://github.com/hasibzunair/peekaboo' target='_blank'>Github</a></p>"

	gr.Interface(
	inference,
	gr.components.Image(type="filepath", label="Input Image"),
	gr.components.Image(type="numpy", label="Predicted Output"),
	examples=[
	"./data/examples/godzillaxkong.jpeg",
	"./data/examples/avengers.jpeg",
	"./data/examples/dinosaur.jpeg",
	"./data/examples/chitauri.jpeg",
	"./data/examples/kayak.jpeg",
	],
	title=title,
	description=description,
	article=article,
	allow_flagging=False,
	analytics_enabled=False,
	).launch()