fffiloni commited on
Commit
42f9e3a
1 Parent(s): 38aa98a

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +110 -0
app.py ADDED
@@ -0,0 +1,110 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # --------------------------------------------------------
2
+ # X-Decoder -- Generalized Decoding for Pixel, Image, and Language
3
+ # Copyright (c) 2022 Microsoft
4
+ # Licensed under The MIT License [see LICENSE for details]
5
+ # Written by Jianwei Yang (jianwyan@microsoft.com), Xueyan Zou (xueyan@cs.wisc.edu)
6
+ # --------------------------------------------------------
7
+
8
+ import os
9
+ os.system("python -m pip install git+https://github.com/MaureenZOU/detectron2-xyz.git")
10
+
11
+ import gradio as gr
12
+ import torch
13
+ import argparse
14
+
15
+ from xdecoder.BaseModel import BaseModel
16
+ from xdecoder import build_model
17
+ from utils.distributed import init_distributed
18
+ from utils.arguments import load_opt_from_config_files
19
+
20
+ from tasks import *
21
+
22
+ def parse_option():
23
+ parser = argparse.ArgumentParser('X-Decoder All-in-One Demo', add_help=False)
24
+ parser.add_argument('--conf_files', default="configs/xdecoder/svlp_focalt_lang.yaml", metavar="FILE", help='path to config file', )
25
+ args = parser.parse_args()
26
+
27
+ return args
28
+
29
+ '''
30
+ build args
31
+ '''
32
+ args = parse_option()
33
+ opt = load_opt_from_config_files(args.conf_files)
34
+ opt = init_distributed(opt)
35
+
36
+ # META DATA
37
+ pretrained_pth_last = os.path.join("xdecoder_focalt_last.pt")
38
+ pretrained_pth_novg = os.path.join("xdecoder_focalt_last_novg.pt")
39
+
40
+ if not os.path.exists(pretrained_pth_last):
41
+ os.system("wget {}".format("https://projects4jw.blob.core.windows.net/x-decoder/release/xdecoder_focalt_last.pt"))
42
+
43
+ if not os.path.exists(pretrained_pth_novg):
44
+ os.system("wget {}".format("https://projects4jw.blob.core.windows.net/x-decoder/release/xdecoder_focalt_last_novg.pt"))
45
+
46
+
47
+ '''
48
+ build model
49
+ '''
50
+ model_last = BaseModel(opt, build_model(opt)).from_pretrained(pretrained_pth_last).eval().cuda()
51
+
52
+ with torch.no_grad():
53
+ model_last.model.sem_seg_head.predictor.lang_encoder.get_text_embeddings(["background", "background"], is_eval=True)
54
+
55
+ '''
56
+ inference model
57
+ '''
58
+
59
+ @torch.no_grad()
60
+ def inference(image, instruction, *args, **kwargs):
61
+ image = image.convert("RGB")
62
+ with torch.autocast(device_type='cuda', dtype=torch.float16):
63
+ return referring_inpainting_gpt3(model_last, image, instruction, *args, **kwargs)
64
+
65
+ '''
66
+ launch app
67
+ '''
68
+
69
+ title = "Instructional Image Editing"
70
+ description = """<p style='text-align: center'> <a href='https://x-decoder-vl.github.io/' target='_blank'>Project Page</a> | <a href='https://arxiv.org/pdf/2212.11270.pdf' target='_blank'>Paper</a> | <a href='https://github.com/microsoft/X-Decoder' target='_blank'>Github Repo</a> | <a href='https://youtu.be/wYp6vmyolqE' target='_blank'>Video</a></p>
71
+ <p style='text-align: center; color: red;'> NOTE: This demo is mainly for object-centric instructional image editing! For style transfer please refer to the hero demo <a href='https://huggingface.co/spaces/timbrooks/instruct-pix2pix' target='_blank'>Instruct-Pix2Pix</a></p>
72
+ <p>For faster inference without waiting in queue, you may duplicate the space and upgrade to GPU in settings.
73
+ <br/>
74
+ <a href="https://huggingface.co/spaces/xdecoder/Instruct-X-Decoder?duplicate=true">
75
+ <img style="margin-top: 0em; margin-bottom: 0em" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
76
+ </p>
77
+ """
78
+
79
+ help_text = """
80
+ This demo is leveraging X-Decoder's fine-grained understanding for instruction-based image editing. You can use it to:
81
+ 1. Remove object, e.g., remove the dog in the image
82
+ 2. Replace object, e.g., change the sky with a mountain
83
+ """
84
+
85
+ gr.Markdown(help_text)
86
+
87
+ inputs = [gr.inputs.Image(type='pil'), gr.Textbox(label="instruction")]
88
+ gr.Interface(
89
+ fn=inference,
90
+ inputs=inputs,
91
+ outputs=[
92
+ gr.outputs.Image(
93
+ type="pil",
94
+ label="edit result"),
95
+ ],
96
+ examples=[
97
+ ["./images/blue_white_bird.jpg", "change the color of bird's feathers from blue to red."],
98
+ ["./images/house.jpg", "change the house to a modern one."],
99
+ ["./images/apples.jpg", "change green apple to a red apple"],
100
+ ["./images/Furniture_Gateway_02.jpg", "make the sofa to one with leather"],
101
+ ["./images/girl_and_two_boys.png", "remove the boy with blue backbag"],
102
+ ["./images/dog.png", "remove the chair"],
103
+ ["./images/horse.png", "change the sky to mountain"],
104
+ ["./images/Magritte_TheSonOfMan.jpg", "remove the green apple"]
105
+ ],
106
+ title=title,
107
+ description=description,
108
+ allow_flagging='never',
109
+ cache_examples=True,
110
+ ).launch()