Spaces:

fffiloni
/

x-decoder-video

Paused

App Files Files Community

x-decoder-video / app.py

fffiloni

Create app.py

42f9e3a about 2 years ago

raw

history blame

4.34 kB

	# --------------------------------------------------------
	# X-Decoder -- Generalized Decoding for Pixel, Image, and Language
	# Copyright (c) 2022 Microsoft
	# Licensed under The MIT License [see LICENSE for details]
	# Written by Jianwei Yang (jianwyan@microsoft.com), Xueyan Zou (xueyan@cs.wisc.edu)
	# --------------------------------------------------------

	import os
	os.system("python -m pip install git+https://github.com/MaureenZOU/detectron2-xyz.git")

	import gradio as gr
	import torch
	import argparse

	from xdecoder.BaseModel import BaseModel
	from xdecoder import build_model
	from utils.distributed import init_distributed
	from utils.arguments import load_opt_from_config_files

	from tasks import *

	def parse_option():
	parser = argparse.ArgumentParser('X-Decoder All-in-One Demo', add_help=False)
	parser.add_argument('--conf_files', default="configs/xdecoder/svlp_focalt_lang.yaml", metavar="FILE", help='path to config file', )
	args = parser.parse_args()

	return args

	'''
	build args
	'''
	args = parse_option()
	opt = load_opt_from_config_files(args.conf_files)
	opt = init_distributed(opt)

	# META DATA
	pretrained_pth_last = os.path.join("xdecoder_focalt_last.pt")
	pretrained_pth_novg = os.path.join("xdecoder_focalt_last_novg.pt")

	if not os.path.exists(pretrained_pth_last):
	os.system("wget {}".format("https://projects4jw.blob.core.windows.net/x-decoder/release/xdecoder_focalt_last.pt"))

	if not os.path.exists(pretrained_pth_novg):
	os.system("wget {}".format("https://projects4jw.blob.core.windows.net/x-decoder/release/xdecoder_focalt_last_novg.pt"))


	'''
	build model
	'''
	model_last = BaseModel(opt, build_model(opt)).from_pretrained(pretrained_pth_last).eval().cuda()

	with torch.no_grad():
	model_last.model.sem_seg_head.predictor.lang_encoder.get_text_embeddings(["background", "background"], is_eval=True)

	'''
	inference model
	'''

	@torch.no_grad()
	def inference(image, instruction, args, *kwargs):
	image = image.convert("RGB")
	with torch.autocast(device_type='cuda', dtype=torch.float16):
	return referring_inpainting_gpt3(model_last, image, instruction, args, *kwargs)

	'''
	launch app
	'''

	title = "Instructional Image Editing"
	description = """<p style='text-align: center'> <a href='https://x-decoder-vl.github.io/' target='_blank'>Project Page</a> \| <a href='https://arxiv.org/pdf/2212.11270.pdf' target='_blank'>Paper</a> \| <a href='https://github.com/microsoft/X-Decoder' target='_blank'>Github Repo</a> \| <a href='https://youtu.be/wYp6vmyolqE' target='_blank'>Video</a></p>
	<p style='text-align: center; color: red;'> NOTE: This demo is mainly for object-centric instructional image editing! For style transfer please refer to the hero demo <a href='https://huggingface.co/spaces/timbrooks/instruct-pix2pix' target='_blank'>Instruct-Pix2Pix</a></p>
	<p>For faster inference without waiting in queue, you may duplicate the space and upgrade to GPU in settings.
	<br/>
	<a href="https://huggingface.co/spaces/xdecoder/Instruct-X-Decoder?duplicate=true">
	<img style="margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
	</p>
	"""

	help_text = """
	This demo is leveraging X-Decoder's fine-grained understanding for instruction-based image editing. You can use it to:
	1. Remove object, e.g., remove the dog in the image
	2. Replace object, e.g., change the sky with a mountain
	"""

	gr.Markdown(help_text)

	inputs = [gr.inputs.Image(type='pil'), gr.Textbox(label="instruction")]
	gr.Interface(
	fn=inference,
	inputs=inputs,
	outputs=[
	gr.outputs.Image(
	type="pil",
	label="edit result"),
	],
	examples=[
	["./images/blue_white_bird.jpg", "change the color of bird's feathers from blue to red."],
	["./images/house.jpg", "change the house to a modern one."],
	["./images/apples.jpg", "change green apple to a red apple"],
	["./images/Furniture_Gateway_02.jpg", "make the sofa to one with leather"],
	["./images/girl_and_two_boys.png", "remove the boy with blue backbag"],
	["./images/dog.png", "remove the chair"],
	["./images/horse.png", "change the sky to mountain"],
	["./images/Magritte_TheSonOfMan.jpg", "remove the green apple"]
	],
	title=title,
	description=description,
	allow_flagging='never',
	cache_examples=True,
	).launch()