Spaces:

erwann
/

Face-editor

Configuration error

App Files Files Community

Erwann Millon commited on Jan 7, 2023

Commit

006354e

•

2 Parent(s): eac223c dc51a98

merge from hf

Browse files

Files changed (9) hide show

ImageState.py +50 -22
README.md +12 -0
animation.py +3 -2
app.py +121 -96
app_backend.py → backend.py +5 -10
configs.py +15 -0
loaders.py +1 -0
masking.py +1 -1
presets.py +16 -0

ImageState.py CHANGED Viewed

@@ -1,9 +1,11 @@
 # from align import align_from_path
 from animation import clear_img_dir
-from app_backend import ImagePromptOptimizer, log
-from functools import cache
 import importlib
 import gradio as gr
 import matplotlib.pyplot as plt
 import torch
@@ -15,13 +17,13 @@ from torchvision.transforms.functional import resize
 from tqdm import tqdm
 from transformers import CLIPModel, CLIPProcessor
 import lpips
-from app_backend import get_resized_tensor
 from edit import blend_paths
 from img_processing import *
 from img_processing import custom_to_pil
 from loaders import load_default
 num = 0
 class PromptTransformHistory():
     def __init__(self, iterations) -> None:
         self.iterations = iterations
@@ -29,6 +31,7 @@ class PromptTransformHistory():
 class ImageState:
     def __init__(self, vqgan, prompt_optimizer: ImagePromptOptimizer) -> None:
         self.vqgan = vqgan
         self.device = vqgan.device
         self.blend_latent = None
@@ -38,6 +41,8 @@ class ImageState:
         self.transform_history = []
         self.attn_mask = None
         self.prompt_optim = prompt_optimizer
         self._load_vectors()
         self.init_transforms()
     def _load_vectors(self):
@@ -45,6 +50,22 @@ class ImageState:
         self.red_blue_vector = torch.load("./latent_vectors/2blue_eyes.pt", map_location=self.device)
         self.green_purple_vector = torch.load("./latent_vectors/nose_vector.pt", map_location=self.device)
         self.asian_vector = torch.load("./latent_vectors/asian10.pt", map_location=self.device)
     def init_transforms(self):
         self.blue_eyes = torch.zeros_like(self.lip_vector)
         self.lip_size = torch.zeros_like(self.lip_vector)
@@ -54,7 +75,7 @@ class ImageState:
     def clear_transforms(self):
         global num
         self.init_transforms()
-        clear_img_dir()
         num = 0
         return self._render_all_transformations()
     def _apply_vector(self, src, vector):
@@ -63,7 +84,7 @@ class ImageState:
     def _decode_latent_to_pil(self, latent):
         current_im = self.vqgan.decode(latent.to(self.device))[0]
         return custom_to_pil(current_im)
-    def get_mask(self, img, mask=None):
         if img and "mask" in img and img["mask"] is not None:
             attn_mask = torchvision.transforms.ToTensor()(img["mask"])
             attn_mask = torch.ceil(attn_mask[0].to(self.device))
@@ -74,7 +95,7 @@ class ImageState:
             attn_mask = mask
         return attn_mask
     def set_mask(self, img):
-        attn_mask = self.get_mask(img)
         self.attn_mask = attn_mask
             # attn_mask = torch.ones_like(img, device=self.device)
         x = attn_mask.clone()
@@ -88,15 +109,21 @@ class ImageState:
     @torch.no_grad()
     def _render_all_transformations(self, return_twice=True):
         global num
         current_vector_transforms = (self.blue_eyes, self.lip_size, self.hair_gp, self.asian_transform, sum(self.current_prompt_transforms))
         new_latent = self.blend_latent + sum(current_vector_transforms)
         if self.quant:
             new_latent, _, _ = self.vqgan.quantize(new_latent.to(self.device))
         image = self._decode_latent_to_pil(new_latent)
-        img_dir = "./img_history"
         if not os.path.exists(img_dir):
             os.mkdir(img_dir)
-        image.save(f"./img_history/img_{num:06}.png")
         num += 1
         return (image, image) if return_twice else image
     def apply_gp_vector(self, weight):
@@ -112,17 +139,21 @@ class ImageState:
         print(f"val = {val}")
         self.quant = val
         return self._render_all_transformations()
-    def apply_gender_vector(self, weight):
         self.asian_transform = weight * self.asian_vector
         return self._render_all_transformations()
     def update_images(self, path1, path2, blend_weight):
         if path1 is None and path2 is None:
             return None
         if path1 is None: path1 = path2
         if path2 is None: path2 = path1
         self.path1, self.path2 = path1, path2
-        # self.aligned_path1 = align_from_path(path1)
-        # self.aligned_path2 = align_from_path(path2)
         return self.blend(blend_weight)
     @torch.no_grad()
     def blend(self, weight):
@@ -137,16 +168,11 @@ class ImageState:
         prompt_transform = self.transform_history[-1]
         latent_index = int(index / 100 * (prompt_transform.iterations - 1))
         print(latent_index)
-        self.current_prompt_transforms[-1] = prompt_transform.transforms[latent_index]
-        # print(self.current_prompt_transform)
-        # print(self.current_prompt_transforms.mean())
         return self._render_all_transformations()
-    def rescale_mask(self, mask):
-        rep = mask.clone()
-        rep[mask < 0.03] = -1000000
-        rep[mask >= 0.03] = 1
-        return rep
     def apply_prompts(self, positive_prompts, negative_prompts, lr, iterations, lpips_weight, reconstruction_steps):
         transform_log = PromptTransformHistory(iterations + reconstruction_steps)
         transform_log.transforms.append(torch.zeros_like(self.blend_latent, requires_grad=False))
         self.current_prompt_transforms.append(torch.zeros_like(self.blend_latent, requires_grad=False))
@@ -165,7 +191,7 @@ class ImageState:
         for i, transform in enumerate(self.prompt_optim.optimize(self.blend_latent,
                                                                 positive_prompts,
                                                                 negative_prompts)):
-            transform_log.transforms.append(transform.clone().detach())
             self.current_prompt_transforms[-1] = transform
             with torch.no_grad():
                 image = self._render_all_transformations(return_twice=False)
@@ -176,6 +202,8 @@ class ImageState:
             wandb.finish()
         self.attn_mask = None
         self.transform_history.append(transform_log)
         # transform = self.prompt_optim.optimize(self.blend_latent,
                                                 # positive_prompts,
                                                 # negative_prompts)

 # from align import align_from_path
+import gc
+import imageio
+import glob
+import uuid
 from animation import clear_img_dir
+from backend import ImagePromptOptimizer, log
 import importlib
 import gradio as gr
 import matplotlib.pyplot as plt
 import torch
 from tqdm import tqdm
 from transformers import CLIPModel, CLIPProcessor
 import lpips
+from backend import get_resized_tensor
 from edit import blend_paths
 from img_processing import *
 from img_processing import custom_to_pil
 from loaders import load_default
 num = 0
 class PromptTransformHistory():
     def __init__(self, iterations) -> None:
         self.iterations = iterations
 class ImageState:
     def __init__(self, vqgan, prompt_optimizer: ImagePromptOptimizer) -> None:
+        # global vqgan
         self.vqgan = vqgan
         self.device = vqgan.device
         self.blend_latent = None
         self.transform_history = []
         self.attn_mask = None
         self.prompt_optim = prompt_optimizer
+        self.state_id = None
+        print(self.state_id)
         self._load_vectors()
         self.init_transforms()
     def _load_vectors(self):
         self.red_blue_vector = torch.load("./latent_vectors/2blue_eyes.pt", map_location=self.device)
         self.green_purple_vector = torch.load("./latent_vectors/nose_vector.pt", map_location=self.device)
         self.asian_vector = torch.load("./latent_vectors/asian10.pt", map_location=self.device)
+    def create_gif(self, total_duration, extend_frames, gif_name="face_edit.gif"):
+        images = []
+        folder = self.state_id
+        paths = glob.glob(folder + "/*")
+        frame_duration = total_duration / len(paths)
+        print(len(paths), "frame dur", frame_duration)
+        durations = [frame_duration] * len(paths)
+        if extend_frames:
+            durations [0] = 1.5
+            durations [-1] = 3
+        for file_name in os.listdir(folder):
+            if file_name.endswith('.png'):
+                file_path = os.path.join(folder, file_name)
+                images.append(imageio.imread(file_path))
+        imageio.mimsave(gif_name, images, duration=durations)
+        return gif_name
     def init_transforms(self):
         self.blue_eyes = torch.zeros_like(self.lip_vector)
         self.lip_size = torch.zeros_like(self.lip_vector)
     def clear_transforms(self):
         global num
         self.init_transforms()
+        clear_img_dir("./img_history")
         num = 0
         return self._render_all_transformations()
     def _apply_vector(self, src, vector):
     def _decode_latent_to_pil(self, latent):
         current_im = self.vqgan.decode(latent.to(self.device))[0]
         return custom_to_pil(current_im)
+    def _get_mask(self, img, mask=None):
         if img and "mask" in img and img["mask"] is not None:
             attn_mask = torchvision.transforms.ToTensor()(img["mask"])
             attn_mask = torch.ceil(attn_mask[0].to(self.device))
             attn_mask = mask
         return attn_mask
     def set_mask(self, img):
+        attn_mask = self._get_mask(img)
         self.attn_mask = attn_mask
             # attn_mask = torch.ones_like(img, device=self.device)
         x = attn_mask.clone()
     @torch.no_grad()
     def _render_all_transformations(self, return_twice=True):
         global num
+        # global vqgan
+        if self.state_id is None:
+            self.state_id = "./img_history/" + str(uuid.uuid4())
+            print("redner all", self.state_id)
         current_vector_transforms = (self.blue_eyes, self.lip_size, self.hair_gp, self.asian_transform, sum(self.current_prompt_transforms))
         new_latent = self.blend_latent + sum(current_vector_transforms)
         if self.quant:
             new_latent, _, _ = self.vqgan.quantize(new_latent.to(self.device))
         image = self._decode_latent_to_pil(new_latent)
+        img_dir = self.state_id
+        if not os.path.exists("img_history"):
+            os.mkdir("./img_history")
         if not os.path.exists(img_dir):
             os.mkdir(img_dir)
+        image.save(f"{img_dir}/img_{num:06}.png")
         num += 1
         return (image, image) if return_twice else image
     def apply_gp_vector(self, weight):
         print(f"val = {val}")
         self.quant = val
         return self._render_all_transformations()
+    def apply_asian_vector(self, weight):
         self.asian_transform = weight * self.asian_vector
         return self._render_all_transformations()
     def update_images(self, path1, path2, blend_weight):
         if path1 is None and path2 is None:
+            print("no paths")
             return None
+        if path1 == path2:
+            print("paths are the same")
+            print(path1)
         if path1 is None: path1 = path2
         if path2 is None: path2 = path1
         self.path1, self.path2 = path1, path2
+        if self.state_id:
+            clear_img_dir(self.state_id)
         return self.blend(blend_weight)
     @torch.no_grad()
     def blend(self, weight):
         prompt_transform = self.transform_history[-1]
         latent_index = int(index / 100 * (prompt_transform.iterations - 1))
         print(latent_index)
+        self.current_prompt_transforms[-1] = prompt_transform.transforms[latent_index].to(self.device)
         return self._render_all_transformations()
     def apply_prompts(self, positive_prompts, negative_prompts, lr, iterations, lpips_weight, reconstruction_steps):
+        if self.state_id is None:
+            self.state_id = "./img_history/" + str(uuid.uuid4())
         transform_log = PromptTransformHistory(iterations + reconstruction_steps)
         transform_log.transforms.append(torch.zeros_like(self.blend_latent, requires_grad=False))
         self.current_prompt_transforms.append(torch.zeros_like(self.blend_latent, requires_grad=False))
         for i, transform in enumerate(self.prompt_optim.optimize(self.blend_latent,
                                                                 positive_prompts,
                                                                 negative_prompts)):
+            transform_log.transforms.append(transform.detach().cpu())
             self.current_prompt_transforms[-1] = transform
             with torch.no_grad():
                 image = self._render_all_transformations(return_twice=False)
             wandb.finish()
         self.attn_mask = None
         self.transform_history.append(transform_log)
+        gc.collect()
+        torch.cuda.empty_cache()
         # transform = self.prompt_optim.optimize(self.blend_latent,
                                                 # positive_prompts,
                                                 # negative_prompts)

README.md ADDED Viewed

	@@ -0,0 +1,12 @@

+---
+title: Face Editor
+emoji: 🪞
+colorFrom: yellow
+colorTo: indigo
+sdk: gradio
+sdk_version: 3.14.0
+app_file: app.py
+pinned: false
+---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

animation.py CHANGED Viewed

@@ -2,8 +2,9 @@ import imageio
 import glob
 import os
-def clear_img_dir():
-    img_dir = "./img_history"
     if not os.path.exists(img_dir):
         os.mkdir(img_dir)
     for filename in glob.glob(img_dir+"/*"):

 import glob
 import os
+def clear_img_dir(img_dir):
+    if not os.path.exists("img_history"):
+        os.mkdir("img_history")
     if not os.path.exists(img_dir):
         os.mkdir(img_dir)
     for filename in glob.glob(img_dir+"/*"):

app.py CHANGED Viewed

@@ -3,40 +3,106 @@ import os
 import sys
 import wandb
 from presets import set_major_global, set_major_local, set_small_local
 sys.path.append("taming-transformers")
-import functools
 import gradio as gr
 from transformers import CLIPModel, CLIPProcessor
 import edit
-# import importlib
-# importlib.reload(edit)
-from app_backend import ImagePromptOptimizer, ProcessorGradientFlow
 from ImageState import ImageState
 from loaders import load_default
-from animation import create_gif
 from prompts import get_random_prompts
-device = "cuda"
 vqgan = load_default(device)
 vqgan.eval()
 processor = ProcessorGradientFlow(device=device)
-clip = CLIPModel.from_pretrained("openai/clip-vit-large-patch14")
-clip.to(device)
-promptoptim = ImagePromptOptimizer(vqgan, clip, processor, quantize=True)
-state = ImageState(vqgan, promptoptim)
-def set_img_from_example(img):
     return state.update_images(img, img, 0)
 def get_cleared_mask():
     return gr.Image.update(value=None)
     # mask.clear()
 with gr.Blocks(css="styles.css") as demo:
     with gr.Row():
         with gr.Column(scale=1):
             blue_eyes = gr.Slider(
                 label="Blue Eyes",
                 minimum=-.8,
@@ -76,120 +142,79 @@ with gr.Blocks(css="styles.css") as demo:
                 maximum=2.,
                 step=0.07,
             )
-            with gr.Row():
-                with gr.Column():
-                    gr.Markdown(value="""## Image Upload
-                                    For best results, crop the photos like in the example pictures""", show_label=False)
-                    with gr.Row():
-                        base_img = gr.Image(label="Base Image", type="filepath")
-                        blend_img = gr.Image(label="Image for face blending (optional)", type="filepath")
-                    # gr.Markdown("## Image Examples")
-                with gr.Accordion(label="Add Mask", open=False):
-                    mask = gr.Image(tool="sketch", interactive=True)
-                    gr.Markdown(value="Note: You must clear the mask using the rewind button every time you want to change the mask (this is a gradio bug)")
-                    set_mask = gr.Button(value="Set mask")
-                    gr.Text(value="this image shows the mask passed to the model when you press set mask (debugging purposes)")
-                    testim = gr.Image()
-                    clear_mask = gr.Button(value="Clear mask")
-                    clear_mask.click(get_cleared_mask, outputs=mask)
-            with gr.Row():
-                gr.Examples(
-                    examples=glob.glob("test_pics/*"),
-                    inputs=base_img,
-                    outputs=blend_img,
-                    fn=set_img_from_example,
-                    # cache_examples=True,
-                    )
-        with gr.Column(scale=1):
-            out = gr.Image()
-            rewind = gr.Slider(value=100,
-                                label="Rewind back through a prompt transform: Use this to scroll through the iterations of your prompt transformation.",
-                                minimum=0,
-                                maximum=100)
-            apply_prompts = gr.Button(value="Apply Prompts", elem_id="apply")
-            clear = gr.Button(value="Clear all transformations (irreversible)", elem_id="warning")
-            with gr.Accordion(label="Save Animation", open=False):
                 gr.Text(value="Creates an animation of all the steps in the editing process", show_label=False)
                 duration = gr.Number(value=10, label="Duration of the animation in seconds")
                 extend_frames = gr.Checkbox(value=True, label="Make first and last frame longer")
                 gif = gr.File(interactive=False)
                 create_animation = gr.Button(value="Create Animation")
-                create_animation.click(create_gif, inputs=[duration, extend_frames], outputs=gif)
         with gr.Column(scale=1):
-            gr.Markdown(value="""## Text Prompting
-            See readme for a prompting guide. Use the '|' symbol to separate prompts. Use the "Add mask" section to make local edits. Negative prompts are highly recommended""", show_label=False)
             positive_prompts = gr.Textbox(label="Positive prompts",
-                                            value="a picture of a woman with a very big nose | a picture of a woman with a large wide nose | a woman with an extremely prominent nose")
             negative_prompts = gr.Textbox(label="Negative prompts",
-                                            value="a picture of a person with a tiny nose | a picture of a person with a very thin nose")
             gen_prompts = gr.Button(value="🎲 Random prompts")
             gen_prompts.click(get_random_prompts, outputs=[positive_prompts, negative_prompts])
             with gr.Row():
                 with gr.Column():
-                    gr.Text(value="Prompt Editing Configuration", show_label=False)
                     with gr.Row():
-                        gr.Markdown(value="## Preset Configs", show_label=False)
                     with gr.Row():
-                        with gr.Column():
-                            small_local = gr.Button(value="Small Masked Changes (e.g. add lipstick)", elem_id="small_local").style(full_width=False)
-                        with gr.Column():
-                            major_local = gr.Button(value="Major Masked Changes (e.g. change hair color or nose size)").style(full_width=False)
-                        with gr.Column():
-                            major_global = gr.Button(value="Major Global Changes (e.g. change race / gender").style(full_width=False)
                     iterations = gr.Slider(minimum=10,
-                                            maximum=300,
                                             step=1,
                                             value=20,
                                             label="Iterations: How many steps the model will take to modify the image. Try starting small and seeing how the results turn out, you can always resume with afterwards",)
-                    learning_rate = gr.Slider(minimum=1e-3,
-                                            maximum=6e-1,
-                                            value=1e-2,
                                             label="Learning Rate: How strong the change in each step will be (you should raise this for bigger changes (for example, changing hair color), and lower it for more minor changes. Raise if changes aren't strong enough")
-                with gr.Accordion(label="Advanced Prompt Editing Options", open=False):
                     lpips_weight = gr.Slider(minimum=0,
                                             maximum=50,
                                             value=1,
-                                            label="Perceptual similarity weight (Keeps areas outside of the mask looking similar to the original. Increase if the rest of the image is changing too much while you're trying to change make a localized edit")
                     reconstruction_steps = gr.Slider(minimum=0,
                                             maximum=50,
-                                            value=15,
                                             step=1,
-                                            label="Steps to run at the end of the optimization, optimizing only the masked perceptual loss. If the edit is changing the identity too much, this setting will run steps at the end that will 'pull' the image back towards the original identity")
                     # discriminator_steps = gr.Slider(minimum=0,
                     #                         maximum=50,
                     #                         step=1,
                     #                         value=0,
                     #                         label="Steps to run at the end, optimizing only the discriminator loss. This helps to reduce artefacts, but because the model is trained on CelebA, this will make your generations look more like generic white celebrities")
-    clear.click(state.clear_transforms, outputs=[out, mask])
-    asian_weight.change(state.apply_gender_vector, inputs=[asian_weight], outputs=[out, mask])
-    lip_size.change(state.apply_lip_vector, inputs=[lip_size], outputs=[out, mask])
-    # hair_green_purple.change(state.apply_gp_vector, inputs=[hair_green_purple], outputs=[out, mask])
-    blue_eyes.change(state.apply_rb_vector, inputs=[blue_eyes], outputs=[out, mask])
-    blend_weight.change(state.blend, inputs=[blend_weight], outputs=[out, mask])
-    # requantize.change(state.update_requant, inputs=[requantize], outputs=[out, mask])
-    base_img.change(state.update_images, inputs=[base_img, blend_img, blend_weight], outputs=[out, mask])
-    blend_img.change(state.update_images, inputs=[base_img, blend_img, blend_weight], outputs=[out, mask])
-    small_local.click(set_small_local, outputs=[iterations, learning_rate, lpips_weight, reconstruction_steps])
-    major_local.click(set_major_local, outputs=[iterations, learning_rate, lpips_weight, reconstruction_steps])
-    small_local.click(set_major_global, outputs=[iterations, learning_rate, lpips_weight, reconstruction_steps])
-    apply_prompts.click(state.apply_prompts, inputs=[positive_prompts, negative_prompts, learning_rate, iterations, lpips_weight, reconstruction_steps], outputs=[out, mask])
-    rewind.change(state.rewind, inputs=[rewind], outputs=[out, mask])
-    set_mask.click(state.set_mask, inputs=mask, outputs=testim)
 demo.queue()
-demo.launch(debug=True, inbrowser=True)
-# if __name__ == "__main__":
-    # import argparse
-    # parser = argparse.ArgumentParser()
-    # parser.add_argument('--debug', action='store_true', default=False, help='Enable debugging output')
-    # args = parser.parse_args()
-    # # if args.debug:
-    # #     state=None
-    # #     promptoptim=None
-    # # else:
-    # main()

 import sys
 import wandb
+import torch
 from presets import set_major_global, set_major_local, set_small_local
 sys.path.append("taming-transformers")
 import gradio as gr
 from transformers import CLIPModel, CLIPProcessor
+from lpips import LPIPS
 import edit
+from backend import ImagePromptOptimizer, ProcessorGradientFlow
 from ImageState import ImageState
 from loaders import load_default
+# from animation import create_gif
 from prompts import get_random_prompts
+device = "cuda" if torch.cuda.is_available() else "cpu"
+global vqgan
 vqgan = load_default(device)
 vqgan.eval()
 processor = ProcessorGradientFlow(device=device)
+# clip = CLIPModel.from_pretrained("openai/clip-vit-large-patch14")
+lpips_fn = LPIPS(net='vgg').to(device)
+clip = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device)
+promptoptim = ImagePromptOptimizer(vqgan, clip, processor, lpips_fn=lpips_fn, quantize=True)
+def set_img_from_example(state, img):
     return state.update_images(img, img, 0)
 def get_cleared_mask():
     return gr.Image.update(value=None)
     # mask.clear()
+class StateWrapper:
+    def create_gif(state, *args, **kwargs):
+        return state, state[0].create_gif(*args, **kwargs)
+    def apply_asian_vector(state, *args, **kwargs):
+        return state, *state[0].apply_asian_vector(*args, **kwargs)
+    def apply_gp_vector(state, *args, **kwargs):
+        return state, *state[0].apply_gp_vector(*args, **kwargs)
+    def apply_lip_vector(state, *args, **kwargs):
+        return state, *state[0].apply_lip_vector(*args, **kwargs)
+    def apply_prompts(state, *args, **kwargs):
+        print(state[1])
+        for image in state[0].apply_prompts(*args, **kwargs):
+            yield state, *image
+    def apply_rb_vector(state, *args, **kwargs):
+        return state, *state[0].apply_rb_vector(*args, **kwargs)
+    def blend(state, *args, **kwargs):
+        return state, *state[0].blend(*args, **kwargs)
+    def clear_transforms(state, *args, **kwargs):
+        return state, *state[0].clear_transforms(*args, **kwargs)
+    def init_transforms(state, *args, **kwargs):
+        return state, *state[0].init_transforms(*args, **kwargs)
+    def prompt_optim(state, *args, **kwargs):
+        return state, *state[0].prompt_optim(*args, **kwargs)
+    def rescale_mask(state, *args, **kwargs):
+        return state, *state[0].rescale_mask(*args, **kwargs)
+    def rewind(state, *args, **kwargs):
+        return state, *state[0].rewind(*args, **kwargs)
+    def set_mask(state, *args, **kwargs):
+        return state, state[0].set_mask(*args, **kwargs)
+    def update_images(state, *args, **kwargs):
+        return state, *state[0].update_images(*args, **kwargs)
+    def update_requant(state, *args, **kwargs):
+        return state, *state[0].update_requant(*args, **kwargs)
 with gr.Blocks(css="styles.css") as demo:
+    # id = gr.State(str(uuid.uuid4()))
+    state = gr.State([ImageState(vqgan, promptoptim), str(uuid.uuid4())])
     with gr.Row():
         with gr.Column(scale=1):
+            with gr.Row():
+                with gr.Column():
+                    gr.Markdown(value="""## Image Upload
+                                    For best results, crop the photos like in the example pictures""", show_label=False)
+                    with gr.Row():
+                        base_img = gr.Image(label="Base Image", type="filepath")
+                        blend_img = gr.Image(label="Image for face blending (optional)", type="filepath")
+                with gr.Accordion(label="Add Mask", open=False):
+                    mask = gr.Image(tool="sketch", interactive=True)
+                    gr.Markdown(value="Note: You must clear the mask using the rewind button every time you want to change the mask (this is a gradio issue)")
+                    set_mask = gr.Button(value="Set mask")
+                    gr.Text(value="this image shows the mask passed to the model when you press set mask (debugging purposes)")
+                    testim = gr.Image()
+            with gr.Row():
+                gr.Examples(
+                    examples=glob.glob("test_pics/*"),
+                    inputs=base_img,
+                    outputs=blend_img,
+                    fn=set_img_from_example,
+                    )
+        with gr.Column(scale=1):
+            out = gr.Image()
+            rewind = gr.Slider(value=100,
+                                label="Rewind back through a prompt transform: Use this to scroll through the iterations of your prompt transformation.",
+                                minimum=0,
+                                maximum=100)
+            apply_prompts = gr.Button(variant="primary", value="🎨 Apply Prompts", elem_id="apply")
+            clear = gr.Button(value="❌ Clear all transformations (irreversible)", elem_id="warning")
             blue_eyes = gr.Slider(
                 label="Blue Eyes",
                 minimum=-.8,
                 maximum=2.,
                 step=0.07,
             )
+            with gr.Accordion(label="💾 Save Animation", open=False):
                 gr.Text(value="Creates an animation of all the steps in the editing process", show_label=False)
                 duration = gr.Number(value=10, label="Duration of the animation in seconds")
                 extend_frames = gr.Checkbox(value=True, label="Make first and last frame longer")
                 gif = gr.File(interactive=False)
                 create_animation = gr.Button(value="Create Animation")
+                create_animation.click(StateWrapper.create_gif, inputs=[state, duration, extend_frames], outputs=[state, gif])
         with gr.Column(scale=1):
+            gr.Markdown(value="""## ✍️ Prompt Editing
+            See readme for a prompting guide. Use the '|' symbol to separate prompts. Use the "Add mask" section to make local edits (Remember to click Set Mask!). Negative prompts are highly recommended""", show_label=False)
             positive_prompts = gr.Textbox(label="Positive prompts",
+                                            value="A picture of a handsome man | a picture of a masculine man",)
             negative_prompts = gr.Textbox(label="Negative prompts",
+                                            value="a picture of a woman | a picture of a feminine person")
             gen_prompts = gr.Button(value="🎲 Random prompts")
             gen_prompts.click(get_random_prompts, outputs=[positive_prompts, negative_prompts])
             with gr.Row():
                 with gr.Column():
                     with gr.Row():
+                        gr.Markdown(value="## ⚙ Prompt Editing Config", show_label=False)
+                    with gr.Accordion(label="Config Tutorial", open=False):
+                        gr.Markdown(value="""
+                        - If results are not changing enough, increase the learning rate or decrease the perceptual loss weight
+                        - To make local edits, use the 'Add Mask' section
+                        - If using a mask and the image is changing too much outside of the masked area, try increasing the perceptual loss weight or lowering the learning rate
+                        - Use the rewind slider to scroll through the iterations of your prompt transformation, you can resume editing from any point in the history.
+                        - I recommend starting prompts with 'a picture of a'
+                        - To avoid shifts in gender, you can use 'a person' instead of 'a man' or 'a woman', especially in the negative prompts.
+                        - The more 'out-of-domain' the prompts are, the more you need to increase the learning rate and decrease the perceptual loss weight. For example, trying to make a black person have platinum blond hair is more out-of-domain than the same transformation on a caucasian person.
+                        - Example: Higher config values, like learning rate: 0.7, perceptual loss weight: 35 can be used to make major out-of-domain changes.
+                        """)
                     with gr.Row():
+                        # with gr.Column():
+                        presets = gr.Dropdown(value="Select a preset", label="Preset Configs", choices=["Small Masked Changes (e.g. add lipstick)", "Major Masked Changes (e.g. change hair color or nose size)", "Major Global Changes (e.g. change race / gender"])
                     iterations = gr.Slider(minimum=10,
+                                            maximum=60,
                                             step=1,
                                             value=20,
                                             label="Iterations: How many steps the model will take to modify the image. Try starting small and seeing how the results turn out, you can always resume with afterwards",)
+                    learning_rate = gr.Slider(minimum=4e-3,
+                                            maximum=1,
+                                            value=1e-1,
                                             label="Learning Rate: How strong the change in each step will be (you should raise this for bigger changes (for example, changing hair color), and lower it for more minor changes. Raise if changes aren't strong enough")
                     lpips_weight = gr.Slider(minimum=0,
                                             maximum=50,
                                             value=1,
+                                            label="Perceptual Loss weight (Keeps areas outside of the mask looking similar to the original. Increase if the rest of the image is changing too much while you're trying to change make a localized edit")
                     reconstruction_steps = gr.Slider(minimum=0,
                                             maximum=50,
+                                            value=3,
                                             step=1,
+                                            label="Steps to run at the end of the optimization, optimizing only the masked perceptual loss. If the edit is changing the identity too much, this setting will run steps at the end that 'pull' the image back towards the original identity")
                     # discriminator_steps = gr.Slider(minimum=0,
                     #                         maximum=50,
                     #                         step=1,
                     #                         value=0,
                     #                         label="Steps to run at the end, optimizing only the discriminator loss. This helps to reduce artefacts, but because the model is trained on CelebA, this will make your generations look more like generic white celebrities")
+    clear.click(StateWrapper.clear_transforms, inputs=[state], outputs=[state, out, mask])
+    asian_weight.change(StateWrapper.apply_asian_vector, inputs=[state, asian_weight], outputs=[state, out, mask])
+    lip_size.change(StateWrapper.apply_lip_vector, inputs=[state, lip_size], outputs=[state, out, mask])
+    # hair_green_purple.change(StateWrapper.apply_gp_vector, inputs=[state, hair_green_purple], outputs=[state, out, mask])
+    blue_eyes.change(StateWrapper.apply_rb_vector, inputs=[state, blue_eyes], outputs=[state, out, mask])
+    blend_weight.change(StateWrapper.blend, inputs=[state, blend_weight], outputs=[state, out, mask])
+    # requantize.change(StateWrapper.update_requant, inputs=[state, requantize], outputs=[state, out, mask])
+    base_img.change(StateWrapper.update_images, inputs=[state, base_img, blend_img, blend_weight], outputs=[state, out, mask])
+    blend_img.change(StateWrapper.update_images, inputs=[state, base_img, blend_img, blend_weight], outputs=[state, out, mask])
+    # small_local.click(set_small_local, outputs=[iterations, learning_rate, lpips_weight, reconstruction_steps])
+    # major_local.click(set_major_local, outputs=[iterations, learning_rate, lpips_weight, reconstruction_steps])
+    # major_global.click(set_major_global, outputs=[iterations, learning_rate, lpips_weight, reconstruction_steps])
+    apply_prompts.click(StateWrapper.apply_prompts, inputs=[state, positive_prompts, negative_prompts, learning_rate, iterations, lpips_weight, reconstruction_steps], outputs=[state, out, mask])
+    rewind.change(StateWrapper.rewind, inputs=[state, rewind], outputs=[state, out, mask])
+    set_mask.click(StateWrapper.set_mask, inputs=[state, mask], outputs=[state, testim])
+    presets.change(set_preset, inputs=[presets], outputs=[iterations, learning_rate, lpips_weight, reconstruction_steps])
 demo.queue()
+demo.launch(debug=True, enable_queue=True)

app_backend.py → backend.py RENAMED Viewed

@@ -17,7 +17,9 @@ from img_processing import *
 from img_processing import custom_to_pil
 from loaders import load_default
 import glob
-# global log
 log=False
 # ic.disable()
@@ -61,6 +63,7 @@ class ImagePromptOptimizer(nn.Module):
                 vqgan,
                 clip,
                 clip_preprocessor,
                 iterations=100,
                 lr = 0.01,
                 save_vector=True,
@@ -81,11 +84,8 @@ class ImagePromptOptimizer(nn.Module):
         self.make_grid = make_grid
         self.return_val = return_val
         self.quantize = quantize
-        # self.disc = load_disc(self.device)
         self.lpips_weight = lpips_weight
-        self.perceptual_loss = lpips.LPIPS(net='vgg').to(self.device)
-    def disc_loss_fn(self, logits):
-        return -torch.mean(logits)
     def set_latent(self, latent):
         self.latent = latent.detach().to(self.device)
     def set_params(self, lr, iterations, lpips_weight, reconstruction_steps, attn_mask):
@@ -195,11 +195,6 @@ class ImagePromptOptimizer(nn.Module):
             lpips_input.retain_grad()
             with torch.autocast("cuda"):
                 perceptual_loss = self.perceptual_loss(lpips_input, original_img.clone()) * self.lpips_weight
-                with torch.no_grad():
-                    disc_logits = self.disc(transformed_img)
-                    disc_loss = self.disc_loss_fn(disc_logits)
-                    print(f"disc_loss = {disc_loss}")
-                    disc_loss2 = self.disc(processed_img)
             if log:
                 wandb.log({"Perceptual Loss": perceptual_loss})
             print("LPIPS loss: ", perceptual_loss)

 from img_processing import custom_to_pil
 from loaders import load_default
 import glob
+import gc
+global log
 log=False
 # ic.disable()
                 vqgan,
                 clip,
                 clip_preprocessor,
+                lpips_fn,
                 iterations=100,
                 lr = 0.01,
                 save_vector=True,
         self.make_grid = make_grid
         self.return_val = return_val
         self.quantize = quantize
         self.lpips_weight = lpips_weight
+        self.perceptual_loss = lpips_fn
     def set_latent(self, latent):
         self.latent = latent.detach().to(self.device)
     def set_params(self, lr, iterations, lpips_weight, reconstruction_steps, attn_mask):
             lpips_input.retain_grad()
             with torch.autocast("cuda"):
                 perceptual_loss = self.perceptual_loss(lpips_input, original_img.clone()) * self.lpips_weight
             if log:
                 wandb.log({"Perceptual Loss": perceptual_loss})
             print("LPIPS loss: ", perceptual_loss)

configs.py ADDED Viewed

	@@ -0,0 +1,15 @@

+import gradio as gr
+def set_small_local():
+    return (gr.Slider.update(value=18), gr.Slider.update(value=0.15), gr.Slider.update(value=5), gr.Slider.update(value=4))
+def set_major_local():
+    return (gr.Slider.update(value=25), gr.Slider.update(value=0.187), gr.Slider.update(value=36.6), gr.Slider.update(value=6))
+def set_major_global():
+    return (gr.Slider.update(value=30), gr.Slider.update(value=0.1), gr.Slider.update(value=1), gr.Slider.update(value=1))
+def set_preset(config_str):
+    choices=["Small Masked Changes (e.g. add lipstick)", "Major Masked Changes (e.g. change hair color or nose size)", "Major Global Changes (e.g. change race / gender"]
+    if config_str == choices[0]:
+        return set_small_local()
+    elif config_str == choices[1]:
+        return set_major_local()
+    elif config_str == choices[2]:
+        return set_major_global()

loaders.py CHANGED Viewed

@@ -23,6 +23,7 @@ def load_default(device):
     sd = torch.load("./model_checkpoints/vqgan_only.pt", map_location=device)
     model.load_state_dict(sd, strict=True)
     model.to(device)
     return model

     sd = torch.load("./model_checkpoints/vqgan_only.pt", map_location=device)
     model.load_state_dict(sd, strict=True)
     model.to(device)
+    del sd
     return model

masking.py CHANGED Viewed

@@ -13,7 +13,7 @@ from transformers import CLIPModel, CLIPProcessor
 import edit
 # import importlib
 # importlib.reload(edit)
-from app_backend import ImagePromptOptimizer, ImageState, ProcessorGradientFlow
 from loaders import load_default
 device = "cuda"

 import edit
 # import importlib
 # importlib.reload(edit)
+from backend import ImagePromptOptimizer, ImageState, ProcessorGradientFlow
 from loaders import load_default
 device = "cuda"

presets.py ADDED Viewed

	@@ -0,0 +1,16 @@

+import gradio as gr
+def set_preset(config_str):
+    choices=["Small Masked Changes (e.g. add lipstick)", "Major Masked Changes (e.g. change hair color or nose size)", "Major Global Changes (e.g. change race / gender"]
+    if config_str == choices[0]:
+        return set_small_local()
+    elif config_str == choices[1]:
+        return set_major_local()
+    elif config_str == choices[2]:
+        return set_major_global()
+def set_small_local():
+    return (gr.Slider.update(value=25), gr.Slider.update(value=0.15), gr.Slider.update(value=1), gr.Slider.update(value=4))
+def set_major_local():
+    return (gr.Slider.update(value=25), gr.Slider.update(value=0.25), gr.Slider.update(value=35), gr.Slider.update(value=10))
+def set_major_global():
+    return (gr.Slider.update(value=30), gr.Slider.update(value=0.1), gr.Slider.update(value=2), gr.Slider.update(value=0.2))