Spaces:
Paused
Paused
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,110 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# --------------------------------------------------------
|
2 |
+
# X-Decoder -- Generalized Decoding for Pixel, Image, and Language
|
3 |
+
# Copyright (c) 2022 Microsoft
|
4 |
+
# Licensed under The MIT License [see LICENSE for details]
|
5 |
+
# Written by Jianwei Yang (jianwyan@microsoft.com), Xueyan Zou (xueyan@cs.wisc.edu)
|
6 |
+
# --------------------------------------------------------
|
7 |
+
|
8 |
+
import os
|
9 |
+
os.system("python -m pip install git+https://github.com/MaureenZOU/detectron2-xyz.git")
|
10 |
+
|
11 |
+
import gradio as gr
|
12 |
+
import torch
|
13 |
+
import argparse
|
14 |
+
|
15 |
+
from xdecoder.BaseModel import BaseModel
|
16 |
+
from xdecoder import build_model
|
17 |
+
from utils.distributed import init_distributed
|
18 |
+
from utils.arguments import load_opt_from_config_files
|
19 |
+
|
20 |
+
from tasks import *
|
21 |
+
|
22 |
+
def parse_option():
|
23 |
+
parser = argparse.ArgumentParser('X-Decoder All-in-One Demo', add_help=False)
|
24 |
+
parser.add_argument('--conf_files', default="configs/xdecoder/svlp_focalt_lang.yaml", metavar="FILE", help='path to config file', )
|
25 |
+
args = parser.parse_args()
|
26 |
+
|
27 |
+
return args
|
28 |
+
|
29 |
+
'''
|
30 |
+
build args
|
31 |
+
'''
|
32 |
+
args = parse_option()
|
33 |
+
opt = load_opt_from_config_files(args.conf_files)
|
34 |
+
opt = init_distributed(opt)
|
35 |
+
|
36 |
+
# META DATA
|
37 |
+
pretrained_pth_last = os.path.join("xdecoder_focalt_last.pt")
|
38 |
+
pretrained_pth_novg = os.path.join("xdecoder_focalt_last_novg.pt")
|
39 |
+
|
40 |
+
if not os.path.exists(pretrained_pth_last):
|
41 |
+
os.system("wget {}".format("https://projects4jw.blob.core.windows.net/x-decoder/release/xdecoder_focalt_last.pt"))
|
42 |
+
|
43 |
+
if not os.path.exists(pretrained_pth_novg):
|
44 |
+
os.system("wget {}".format("https://projects4jw.blob.core.windows.net/x-decoder/release/xdecoder_focalt_last_novg.pt"))
|
45 |
+
|
46 |
+
|
47 |
+
'''
|
48 |
+
build model
|
49 |
+
'''
|
50 |
+
model_last = BaseModel(opt, build_model(opt)).from_pretrained(pretrained_pth_last).eval().cuda()
|
51 |
+
|
52 |
+
with torch.no_grad():
|
53 |
+
model_last.model.sem_seg_head.predictor.lang_encoder.get_text_embeddings(["background", "background"], is_eval=True)
|
54 |
+
|
55 |
+
'''
|
56 |
+
inference model
|
57 |
+
'''
|
58 |
+
|
59 |
+
@torch.no_grad()
|
60 |
+
def inference(image, instruction, *args, **kwargs):
|
61 |
+
image = image.convert("RGB")
|
62 |
+
with torch.autocast(device_type='cuda', dtype=torch.float16):
|
63 |
+
return referring_inpainting_gpt3(model_last, image, instruction, *args, **kwargs)
|
64 |
+
|
65 |
+
'''
|
66 |
+
launch app
|
67 |
+
'''
|
68 |
+
|
69 |
+
title = "Instructional Image Editing"
|
70 |
+
description = """<p style='text-align: center'> <a href='https://x-decoder-vl.github.io/' target='_blank'>Project Page</a> | <a href='https://arxiv.org/pdf/2212.11270.pdf' target='_blank'>Paper</a> | <a href='https://github.com/microsoft/X-Decoder' target='_blank'>Github Repo</a> | <a href='https://youtu.be/wYp6vmyolqE' target='_blank'>Video</a></p>
|
71 |
+
<p style='text-align: center; color: red;'> NOTE: This demo is mainly for object-centric instructional image editing! For style transfer please refer to the hero demo <a href='https://huggingface.co/spaces/timbrooks/instruct-pix2pix' target='_blank'>Instruct-Pix2Pix</a></p>
|
72 |
+
<p>For faster inference without waiting in queue, you may duplicate the space and upgrade to GPU in settings.
|
73 |
+
<br/>
|
74 |
+
<a href="https://huggingface.co/spaces/xdecoder/Instruct-X-Decoder?duplicate=true">
|
75 |
+
<img style="margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
|
76 |
+
</p>
|
77 |
+
"""
|
78 |
+
|
79 |
+
help_text = """
|
80 |
+
This demo is leveraging X-Decoder's fine-grained understanding for instruction-based image editing. You can use it to:
|
81 |
+
1. Remove object, e.g., remove the dog in the image
|
82 |
+
2. Replace object, e.g., change the sky with a mountain
|
83 |
+
"""
|
84 |
+
|
85 |
+
gr.Markdown(help_text)
|
86 |
+
|
87 |
+
inputs = [gr.inputs.Image(type='pil'), gr.Textbox(label="instruction")]
|
88 |
+
gr.Interface(
|
89 |
+
fn=inference,
|
90 |
+
inputs=inputs,
|
91 |
+
outputs=[
|
92 |
+
gr.outputs.Image(
|
93 |
+
type="pil",
|
94 |
+
label="edit result"),
|
95 |
+
],
|
96 |
+
examples=[
|
97 |
+
["./images/blue_white_bird.jpg", "change the color of bird's feathers from blue to red."],
|
98 |
+
["./images/house.jpg", "change the house to a modern one."],
|
99 |
+
["./images/apples.jpg", "change green apple to a red apple"],
|
100 |
+
["./images/Furniture_Gateway_02.jpg", "make the sofa to one with leather"],
|
101 |
+
["./images/girl_and_two_boys.png", "remove the boy with blue backbag"],
|
102 |
+
["./images/dog.png", "remove the chair"],
|
103 |
+
["./images/horse.png", "change the sky to mountain"],
|
104 |
+
["./images/Magritte_TheSonOfMan.jpg", "remove the green apple"]
|
105 |
+
],
|
106 |
+
title=title,
|
107 |
+
description=description,
|
108 |
+
allow_flagging='never',
|
109 |
+
cache_examples=True,
|
110 |
+
).launch()
|