Spaces:
LAOS-Y
/
Running on Zero

LAOS-Y commited on
Commit
b8477ee
1 Parent(s): 9aa2be5
Files changed (1) hide show
  1. edit_app.py +66 -86
edit_app.py CHANGED
@@ -1,8 +1,9 @@
1
  from __future__ import annotations
2
 
3
  import math
4
- import random
5
  from glob import glob
 
 
6
 
7
  import gradio as gr
8
  import torch
@@ -11,7 +12,58 @@ from datasets import load_dataset
11
  from diffusers import StableDiffusionInstructPix2PixPipeline, EulerAncestralDiscreteScheduler
12
 
13
 
14
- help_text = """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  If you're not getting what you want, there may be a few reasons:
16
  1. Is the image not changing enough? Your Image CFG weight may be too high. This value dictates how similar the output should be to the input. It's possible your edit requires larger changes from the original image, and your Image CFG weight isn't allowing that. Alternatively, your Text CFG weight may be too low. This value dictates how much to listen to the text instruction. The default Image CFG of 1.5 and Text CFG of 7.5 are a good starting point, but aren't necessarily optimal for each edit. Try:
17
  * Decreasing the Image CFG weight, or
@@ -27,29 +79,8 @@ If you're not getting what you want, there may be a few reasons:
27
  """
28
 
29
 
30
- example_instructions = [
31
- "Make it a picasso painting",
32
- "as if it were by modigliani",
33
- "convert to a bronze statue",
34
- "Turn it into an anime.",
35
- "have it look like a graphic novel",
36
- "make him gain weight",
37
- "what would he look like bald?",
38
- "Have him smile",
39
- "Put him in a cocktail party.",
40
- "move him at the beach.",
41
- "add dramatic lighting",
42
- "Convert to black and white",
43
- "What if it were snowing?",
44
- "Give him a leather jacket",
45
- "Turn him into a cyborg!",
46
- "make him wear a beanie",
47
- ]
48
-
49
- # model_id = "timbrooks/instruct-pix2pix"
50
- model_id = "MudeHui/ip2p-warp-gpt4v"
51
-
52
  def main():
 
53
  if torch.cuda.is_available():
54
  pipe = StableDiffusionInstructPix2PixPipeline.from_pretrained(model_id, torch_dtype=torch.float16, safety_checker=None)
55
  pipe = pipe.to('cuda')
@@ -57,59 +88,8 @@ def main():
57
  pipe = StableDiffusionInstructPix2PixPipeline.from_pretrained(model_id, torch_dtype=torch.float, safety_checker=None)
58
  pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config)
59
 
60
- def generate(
61
- input_image: Image.Image,
62
- instruction: str,
63
- steps: int,
64
- randomize_seed: bool,
65
- seed: int,
66
- randomize_cfg: bool,
67
- text_cfg_scale: float,
68
- image_cfg_scale: float,
69
- ):
70
- seed = random.randint(0, 100000) if randomize_seed else seed
71
- text_cfg_scale = round(random.uniform(6.0, 9.0), ndigits=2) if randomize_cfg else text_cfg_scale
72
- image_cfg_scale = round(random.uniform(1.2, 1.8), ndigits=2) if randomize_cfg else image_cfg_scale
73
-
74
- width, height = input_image.size
75
- factor = 512 / max(width, height)
76
- factor = math.ceil(min(width, height) * factor / 64) * 64 / min(width, height)
77
- width = int((width * factor) // 64) * 64
78
- height = int((height * factor) // 64) * 64
79
- input_image = ImageOps.fit(input_image, (width, height), method=Image.Resampling.LANCZOS)
80
-
81
- if instruction == "":
82
- return [input_image, seed]
83
-
84
- generator = torch.manual_seed(seed)
85
- edited_image = pipe(
86
- instruction, image=input_image,
87
- guidance_scale=text_cfg_scale, image_guidance_scale=image_cfg_scale,
88
- num_inference_steps=steps, generator=generator,
89
- ).images[0]
90
- return [seed, text_cfg_scale, image_cfg_scale, edited_image]
91
-
92
- def reset():
93
- return [0, "Randomize Seed", 1371, "Fix CFG", 7.5, 1.5, None]
94
-
95
  image_options = {path.split("/")[-1].split(".")[0]: path for path in sorted(glob("imgs/*png"))}
96
 
97
- def show_image(image_name):
98
- # Retrieve the image file path from the dictionary based on the selected name
99
- return image_options[image_name]
100
-
101
- dataset = load_dataset("UCSC-VLAA/HQ-Edit-data-demo")
102
-
103
- def sample():
104
- sample_id = random.choice(list(range(len(dataset["train"]))))
105
- sample = dataset["train"][sample_id]
106
- return [sample["input_image"], sample["output_image"], sample["edit"], sample["inverse_edit"]]
107
-
108
- def show_large_image(image_info):
109
- # Returns the PIL image and caption for larger display
110
- # return image_info['image'], image_info['caption']
111
- return image_info
112
-
113
  with gr.Blocks() as demo:
114
  gr.HTML("""<h1 style="font-weight: 900; margin-bottom: 7px;">
115
  HQ-Edit: A High-Quality and High-Coverage Dataset for General Image Editing
@@ -133,8 +113,6 @@ def main():
133
  with gr.Row():
134
  input_image = gr.Image(label="Input Image", type="pil", interactive=True, height=512, width=512)
135
  edited_image = gr.Image(label=f"Edited Image", type="pil", interactive=False, height=512, width=512)
136
- # input_image.style(height=512, width=512)
137
- # edited_image.style(height=512, width=512)
138
 
139
  with gr.Row():
140
  steps = gr.Number(value=20, precision=0, label="Steps", interactive=True)
@@ -156,26 +134,23 @@ def main():
156
  text_cfg_scale = gr.Number(value=7.0, label=f"Text CFG", interactive=True)
157
  image_cfg_scale = gr.Number(value=1.5, label=f"Image CFG", interactive=True)
158
 
159
- gr.Markdown(help_text)
160
 
161
  with gr.Row():
162
  gr.Markdown("## Dataset Preview")
163
  sample_button = gr.Button("See Another Sample")
164
 
165
  with gr.Row():
166
- # Set up the Gallery component with a specific number of columns
167
- # gallery = gr.Gallery(value=image_data, label="Image Gallery", type="pil", columns=2)
168
- # Display for larger image
169
  input_image_preview = gr.Image(label="Input Image", type="pil", height=512, width=512)
170
  output_image_preview = gr.Image(label="Output Image", type="pil", height=512, width=512)
171
 
172
  edit_text = gr.Textbox(label="Edit Instruction")
173
  inv_edit_text = gr.Textbox(label="Inverse Edit Instruction")
174
 
175
- dropdown.change(show_image, inputs=dropdown, outputs=input_image)
176
 
177
  generate_button.click(
178
- fn=generate,
179
  inputs=[
180
  input_image,
181
  instruction,
@@ -191,15 +166,20 @@ def main():
191
  reset_button.click(
192
  fn=reset,
193
  inputs=[],
194
- outputs=[steps, randomize_seed, seed, randomize_cfg, text_cfg_scale, image_cfg_scale, edited_image],
195
  )
196
 
 
 
 
 
 
197
  sample_button.click(
198
- fn=sample,
199
  inputs=[],
200
  outputs=[input_image_preview, output_image_preview, edit_text, inv_edit_text]
201
  )
202
-
203
  demo.queue()
204
  demo.launch(share=True, max_threads=1)
205
 
 
1
  from __future__ import annotations
2
 
3
  import math
 
4
  from glob import glob
5
+ from functools import partial
6
+ import random
7
 
8
  import gradio as gr
9
  import torch
 
12
  from diffusers import StableDiffusionInstructPix2PixPipeline, EulerAncestralDiscreteScheduler
13
 
14
 
15
+ def generate(
16
+ input_image: Image.Image,
17
+ instruction: str,
18
+ steps: int,
19
+ randomize_seed: bool,
20
+ seed: int,
21
+ randomize_cfg: bool,
22
+ text_cfg_scale: float,
23
+ image_cfg_scale: float,
24
+ pipe: StableDiffusionInstructPix2PixPipeline
25
+ ):
26
+ seed = random.randint(0, 100000) if randomize_seed else seed
27
+ text_cfg_scale = round(random.uniform(6.0, 9.0), ndigits=2) if randomize_cfg else text_cfg_scale
28
+ image_cfg_scale = round(random.uniform(1.2, 1.8), ndigits=2) if randomize_cfg else image_cfg_scale
29
+
30
+ width, height = input_image.size
31
+ factor = 512 / max(width, height)
32
+ factor = math.ceil(min(width, height) * factor / 64) * 64 / min(width, height)
33
+ width = int((width * factor) // 64) * 64
34
+ height = int((height * factor) // 64) * 64
35
+ input_image = ImageOps.fit(input_image, (width, height), method=Image.Resampling.LANCZOS)
36
+
37
+ if instruction == "":
38
+ return [seed, text_cfg_scale, image_cfg_scale, input_image]
39
+
40
+ generator = torch.manual_seed(seed)
41
+ edited_image = pipe(
42
+ instruction, image=input_image,
43
+ guidance_scale=text_cfg_scale, image_guidance_scale=image_cfg_scale,
44
+ num_inference_steps=steps, generator=generator,
45
+ ).images[0]
46
+ return [seed, text_cfg_scale, image_cfg_scale, edited_image]
47
+
48
+
49
+ def show_image(image_name, image_options):
50
+ if image_name is None:
51
+ return
52
+
53
+ return image_options[image_name]
54
+
55
+
56
+ def reset():
57
+ return [0, "Randomize Seed", 1371, "Fix CFG", 7.5, 1.5, None, None, None, ""]
58
+
59
+
60
+ def sample(dataset):
61
+ sample_id = random.choice(list(range(len(dataset["train"]))))
62
+ sample = dataset["train"][sample_id]
63
+ return [sample["input_image"], sample["output_image"], sample["edit"], sample["inverse_edit"]]
64
+
65
+
66
+ HELP_TEXT = """
67
  If you're not getting what you want, there may be a few reasons:
68
  1. Is the image not changing enough? Your Image CFG weight may be too high. This value dictates how similar the output should be to the input. It's possible your edit requires larger changes from the original image, and your Image CFG weight isn't allowing that. Alternatively, your Text CFG weight may be too low. This value dictates how much to listen to the text instruction. The default Image CFG of 1.5 and Text CFG of 7.5 are a good starting point, but aren't necessarily optimal for each edit. Try:
69
  * Decreasing the Image CFG weight, or
 
79
  """
80
 
81
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  def main():
83
+ model_id = "MudeHui/ip2p-warp-gpt4v"
84
  if torch.cuda.is_available():
85
  pipe = StableDiffusionInstructPix2PixPipeline.from_pretrained(model_id, torch_dtype=torch.float16, safety_checker=None)
86
  pipe = pipe.to('cuda')
 
88
  pipe = StableDiffusionInstructPix2PixPipeline.from_pretrained(model_id, torch_dtype=torch.float, safety_checker=None)
89
  pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config)
90
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
91
  image_options = {path.split("/")[-1].split(".")[0]: path for path in sorted(glob("imgs/*png"))}
92
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  with gr.Blocks() as demo:
94
  gr.HTML("""<h1 style="font-weight: 900; margin-bottom: 7px;">
95
  HQ-Edit: A High-Quality and High-Coverage Dataset for General Image Editing
 
113
  with gr.Row():
114
  input_image = gr.Image(label="Input Image", type="pil", interactive=True, height=512, width=512)
115
  edited_image = gr.Image(label=f"Edited Image", type="pil", interactive=False, height=512, width=512)
 
 
116
 
117
  with gr.Row():
118
  steps = gr.Number(value=20, precision=0, label="Steps", interactive=True)
 
134
  text_cfg_scale = gr.Number(value=7.0, label=f"Text CFG", interactive=True)
135
  image_cfg_scale = gr.Number(value=1.5, label=f"Image CFG", interactive=True)
136
 
137
+ gr.Markdown(HELP_TEXT)
138
 
139
  with gr.Row():
140
  gr.Markdown("## Dataset Preview")
141
  sample_button = gr.Button("See Another Sample")
142
 
143
  with gr.Row():
 
 
 
144
  input_image_preview = gr.Image(label="Input Image", type="pil", height=512, width=512)
145
  output_image_preview = gr.Image(label="Output Image", type="pil", height=512, width=512)
146
 
147
  edit_text = gr.Textbox(label="Edit Instruction")
148
  inv_edit_text = gr.Textbox(label="Inverse Edit Instruction")
149
 
150
+ generate_func = partial(generate, pipe=pipe)
151
 
152
  generate_button.click(
153
+ fn=generate_func,
154
  inputs=[
155
  input_image,
156
  instruction,
 
166
  reset_button.click(
167
  fn=reset,
168
  inputs=[],
169
+ outputs=[steps, randomize_seed, seed, randomize_cfg, text_cfg_scale, image_cfg_scale, input_image, edited_image, dropdown, instruction],
170
  )
171
 
172
+ show_image_func = partial(show_image, image_options=image_options)
173
+ dropdown.change(show_image_func, inputs=dropdown, outputs=input_image)
174
+
175
+ dataset = load_dataset("UCSC-VLAA/HQ-Edit-data-demo")
176
+ sample_func = partial(sample, dataset=dataset)
177
  sample_button.click(
178
+ fn=sample_func,
179
  inputs=[],
180
  outputs=[input_image_preview, output_image_preview, edit_text, inv_edit_text]
181
  )
182
+
183
  demo.queue()
184
  demo.launch(share=True, max_threads=1)
185