HQEdit

Running on Zero

App Files Files Community

LAOS-Y commited on Apr 16

Commit

b8477ee

•

1 Parent(s): 9aa2be5

refract

Browse files

Files changed (1) hide show

edit_app.py +66 -86

edit_app.py CHANGED Viewed

@@ -1,8 +1,9 @@
 from __future__ import annotations
 import math
-import random
 from glob import glob
 import gradio as gr
 import torch
@@ -11,7 +12,58 @@ from datasets import load_dataset
 from diffusers import StableDiffusionInstructPix2PixPipeline, EulerAncestralDiscreteScheduler
-help_text = """
 If you're not getting what you want, there may be a few reasons:
 1. Is the image not changing enough? Your Image CFG weight may be too high. This value dictates how similar the output should be to the input. It's possible your edit requires larger changes from the original image, and your Image CFG weight isn't allowing that. Alternatively, your Text CFG weight may be too low. This value dictates how much to listen to the text instruction. The default Image CFG of 1.5 and Text CFG of 7.5 are a good starting point, but aren't necessarily optimal for each edit. Try:
     * Decreasing the Image CFG weight, or
@@ -27,29 +79,8 @@ If you're not getting what you want, there may be a few reasons:
 """
-example_instructions = [
-    "Make it a picasso painting",
-    "as if it were by modigliani",
-    "convert to a bronze statue",
-    "Turn it into an anime.",
-    "have it look like a graphic novel",
-    "make him gain weight",
-    "what would he look like bald?",
-    "Have him smile",
-    "Put him in a cocktail party.",
-    "move him at the beach.",
-    "add dramatic lighting",
-    "Convert to black and white",
-    "What if it were snowing?",
-    "Give him a leather jacket",
-    "Turn him into a cyborg!",
-    "make him wear a beanie",
-]
-# model_id = "timbrooks/instruct-pix2pix"
-model_id = "MudeHui/ip2p-warp-gpt4v"
 def main():
     if torch.cuda.is_available():
         pipe = StableDiffusionInstructPix2PixPipeline.from_pretrained(model_id, torch_dtype=torch.float16, safety_checker=None)
         pipe = pipe.to('cuda')
@@ -57,59 +88,8 @@ def main():
         pipe = StableDiffusionInstructPix2PixPipeline.from_pretrained(model_id, torch_dtype=torch.float, safety_checker=None)
     pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config)
-    def generate(
-        input_image: Image.Image,
-        instruction: str,
-        steps: int,
-        randomize_seed: bool,
-        seed: int,
-        randomize_cfg: bool,
-        text_cfg_scale: float,
-        image_cfg_scale: float,
-    ):
-        seed = random.randint(0, 100000) if randomize_seed else seed
-        text_cfg_scale = round(random.uniform(6.0, 9.0), ndigits=2) if randomize_cfg else text_cfg_scale
-        image_cfg_scale = round(random.uniform(1.2, 1.8), ndigits=2) if randomize_cfg else image_cfg_scale
-        width, height = input_image.size
-        factor = 512 / max(width, height)
-        factor = math.ceil(min(width, height) * factor / 64) * 64 / min(width, height)
-        width = int((width * factor) // 64) * 64
-        height = int((height * factor) // 64) * 64
-        input_image = ImageOps.fit(input_image, (width, height), method=Image.Resampling.LANCZOS)
-        if instruction == "":
-            return [input_image, seed]
-        generator = torch.manual_seed(seed)
-        edited_image = pipe(
-            instruction, image=input_image,
-            guidance_scale=text_cfg_scale, image_guidance_scale=image_cfg_scale,
-            num_inference_steps=steps, generator=generator,
-        ).images[0]
-        return [seed, text_cfg_scale, image_cfg_scale, edited_image]
-    def reset():
-        return [0, "Randomize Seed", 1371, "Fix CFG", 7.5, 1.5, None]
     image_options = {path.split("/")[-1].split(".")[0]: path for path in sorted(glob("imgs/*png"))}
-    def show_image(image_name):
-        # Retrieve the image file path from the dictionary based on the selected name
-        return image_options[image_name]
-    dataset = load_dataset("UCSC-VLAA/HQ-Edit-data-demo")
-    def sample():
-        sample_id = random.choice(list(range(len(dataset["train"]))))
-        sample = dataset["train"][sample_id]
-        return [sample["input_image"], sample["output_image"], sample["edit"], sample["inverse_edit"]]
-    def show_large_image(image_info):
-        # Returns the PIL image and caption for larger display
-        # return image_info['image'], image_info['caption']
-        return image_info
     with gr.Blocks() as demo:
         gr.HTML("""<h1 style="font-weight: 900; margin-bottom: 7px;">
         HQ-Edit: A High-Quality and High-Coverage Dataset for General Image Editing
@@ -133,8 +113,6 @@ def main():
         with gr.Row():
             input_image = gr.Image(label="Input Image", type="pil", interactive=True, height=512, width=512)
             edited_image = gr.Image(label=f"Edited Image", type="pil", interactive=False, height=512, width=512)
-            # input_image.style(height=512, width=512)
-            # edited_image.style(height=512, width=512)
         with gr.Row():
             steps = gr.Number(value=20, precision=0, label="Steps", interactive=True)
@@ -156,26 +134,23 @@ def main():
             text_cfg_scale = gr.Number(value=7.0, label=f"Text CFG", interactive=True)
             image_cfg_scale = gr.Number(value=1.5, label=f"Image CFG", interactive=True)
-        gr.Markdown(help_text)
         with gr.Row():
             gr.Markdown("## Dataset Preview")
             sample_button = gr.Button("See Another Sample")
         with gr.Row():
-            # Set up the Gallery component with a specific number of columns
-            # gallery = gr.Gallery(value=image_data, label="Image Gallery", type="pil", columns=2)
-            # Display for larger image
             input_image_preview = gr.Image(label="Input Image", type="pil", height=512, width=512)
             output_image_preview = gr.Image(label="Output Image", type="pil", height=512, width=512)
         edit_text = gr.Textbox(label="Edit Instruction")
         inv_edit_text = gr.Textbox(label="Inverse Edit Instruction")
-        dropdown.change(show_image, inputs=dropdown, outputs=input_image)
         generate_button.click(
-            fn=generate,
             inputs=[
                 input_image,
                 instruction,
@@ -191,15 +166,20 @@ def main():
         reset_button.click(
             fn=reset,
             inputs=[],
-            outputs=[steps, randomize_seed, seed, randomize_cfg, text_cfg_scale, image_cfg_scale, edited_image],
         )
         sample_button.click(
-            fn=sample,
             inputs=[],
             outputs=[input_image_preview, output_image_preview, edit_text, inv_edit_text]
         )
     demo.queue()
     demo.launch(share=True, max_threads=1)

 from __future__ import annotations
 import math
 from glob import glob
+from functools import partial
+import random
 import gradio as gr
 import torch
 from diffusers import StableDiffusionInstructPix2PixPipeline, EulerAncestralDiscreteScheduler
+def generate(
+    input_image: Image.Image,
+    instruction: str,
+    steps: int,
+    randomize_seed: bool,
+    seed: int,
+    randomize_cfg: bool,
+    text_cfg_scale: float,
+    image_cfg_scale: float,
+    pipe: StableDiffusionInstructPix2PixPipeline
+):
+    seed = random.randint(0, 100000) if randomize_seed else seed
+    text_cfg_scale = round(random.uniform(6.0, 9.0), ndigits=2) if randomize_cfg else text_cfg_scale
+    image_cfg_scale = round(random.uniform(1.2, 1.8), ndigits=2) if randomize_cfg else image_cfg_scale
+    width, height = input_image.size
+    factor = 512 / max(width, height)
+    factor = math.ceil(min(width, height) * factor / 64) * 64 / min(width, height)
+    width = int((width * factor) // 64) * 64
+    height = int((height * factor) // 64) * 64
+    input_image = ImageOps.fit(input_image, (width, height), method=Image.Resampling.LANCZOS)
+    if instruction == "":
+        return [seed, text_cfg_scale, image_cfg_scale, input_image]
+    generator = torch.manual_seed(seed)
+    edited_image = pipe(
+        instruction, image=input_image,
+        guidance_scale=text_cfg_scale, image_guidance_scale=image_cfg_scale,
+        num_inference_steps=steps, generator=generator,
+    ).images[0]
+    return [seed, text_cfg_scale, image_cfg_scale, edited_image]
+def show_image(image_name, image_options):
+    if image_name is None:
+        return
+    return image_options[image_name]
+def reset():
+    return [0, "Randomize Seed", 1371, "Fix CFG", 7.5, 1.5, None, None, None, ""]
+def sample(dataset):
+    sample_id = random.choice(list(range(len(dataset["train"]))))
+    sample = dataset["train"][sample_id]
+    return [sample["input_image"], sample["output_image"], sample["edit"], sample["inverse_edit"]]
+HELP_TEXT = """
 If you're not getting what you want, there may be a few reasons:
 1. Is the image not changing enough? Your Image CFG weight may be too high. This value dictates how similar the output should be to the input. It's possible your edit requires larger changes from the original image, and your Image CFG weight isn't allowing that. Alternatively, your Text CFG weight may be too low. This value dictates how much to listen to the text instruction. The default Image CFG of 1.5 and Text CFG of 7.5 are a good starting point, but aren't necessarily optimal for each edit. Try:
     * Decreasing the Image CFG weight, or
 """
 def main():
+    model_id = "MudeHui/ip2p-warp-gpt4v"
     if torch.cuda.is_available():
         pipe = StableDiffusionInstructPix2PixPipeline.from_pretrained(model_id, torch_dtype=torch.float16, safety_checker=None)
         pipe = pipe.to('cuda')
         pipe = StableDiffusionInstructPix2PixPipeline.from_pretrained(model_id, torch_dtype=torch.float, safety_checker=None)
     pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(pipe.scheduler.config)
     image_options = {path.split("/")[-1].split(".")[0]: path for path in sorted(glob("imgs/*png"))}
     with gr.Blocks() as demo:
         gr.HTML("""<h1 style="font-weight: 900; margin-bottom: 7px;">
         HQ-Edit: A High-Quality and High-Coverage Dataset for General Image Editing
         with gr.Row():
             input_image = gr.Image(label="Input Image", type="pil", interactive=True, height=512, width=512)
             edited_image = gr.Image(label=f"Edited Image", type="pil", interactive=False, height=512, width=512)
         with gr.Row():
             steps = gr.Number(value=20, precision=0, label="Steps", interactive=True)
             text_cfg_scale = gr.Number(value=7.0, label=f"Text CFG", interactive=True)
             image_cfg_scale = gr.Number(value=1.5, label=f"Image CFG", interactive=True)
+        gr.Markdown(HELP_TEXT)
         with gr.Row():
             gr.Markdown("## Dataset Preview")
             sample_button = gr.Button("See Another Sample")
         with gr.Row():
             input_image_preview = gr.Image(label="Input Image", type="pil", height=512, width=512)
             output_image_preview = gr.Image(label="Output Image", type="pil", height=512, width=512)
         edit_text = gr.Textbox(label="Edit Instruction")
         inv_edit_text = gr.Textbox(label="Inverse Edit Instruction")
+        generate_func = partial(generate, pipe=pipe)
         generate_button.click(
+            fn=generate_func,
             inputs=[
                 input_image,
                 instruction,
         reset_button.click(
             fn=reset,
             inputs=[],
+            outputs=[steps, randomize_seed, seed, randomize_cfg, text_cfg_scale, image_cfg_scale, input_image, edited_image, dropdown, instruction],
         )
+        show_image_func = partial(show_image, image_options=image_options)
+        dropdown.change(show_image_func, inputs=dropdown, outputs=input_image)
+        dataset = load_dataset("UCSC-VLAA/HQ-Edit-data-demo")
+        sample_func = partial(sample, dataset=dataset)
         sample_button.click(
+            fn=sample_func,
             inputs=[],
             outputs=[input_image_preview, output_image_preview, edit_text, inv_edit_text]
         )
     demo.queue()
     demo.launch(share=True, max_threads=1)