Spaces:

erwann
/

Face-editor

Configuration error

App Files Files Community

Erwann Millon commited on Jan 16, 2023

Commit

e0f92a0

•

1 Parent(s): ec39fe8

refactoring and cleanup

Browse files

Files changed (12) hide show

ImageState.py +118 -74
animation.py +8 -6
app.py +7 -7
backend.py +104 -90
edit.py +17 -12
img_processing.py +40 -36
loaders.py +20 -20
masking.py +21 -23
presets.py +30 -4
prompts.py +31 -7
unwrapped.yaml +0 -37
utils.py +3 -1

ImageState.py CHANGED Viewed

@@ -1,183 +1,227 @@
-# from align import align_from_path
 import gc
 import imageio
 import glob
 import uuid
 from animation import clear_img_dir
-from backend import ImagePromptOptimizer, log
-import importlib
-import gradio as gr
-import matplotlib.pyplot as plt
 import torch
 import torchvision
 import wandb
-from icecream import ic
-from torch import nn
-from torchvision.transforms.functional import resize
-from tqdm import tqdm
-from transformers import CLIPModel, CLIPProcessor
-import lpips
-from backend import get_resized_tensor
 from edit import blend_paths
-from img_processing import *
 from img_processing import custom_to_pil
-from loaders import load_default
 num = 0
-class PromptTransformHistory():
     def __init__(self, iterations) -> None:
         self.iterations = iterations
         self.transforms = []
 class ImageState:
-    def __init__(self, vqgan, prompt_optimizer: ImagePromptOptimizer) -> None:
         self.vqgan = vqgan
         self.device = vqgan.device
         self.blend_latent = None
         self.quant = True
         self.path1 = None
         self.path2 = None
         self.transform_history = []
         self.attn_mask = None
         self.prompt_optim = prompt_optimizer
         self._load_vectors()
         self.init_transforms()
     def _load_vectors(self):
-        self.lip_vector = torch.load("./latent_vectors/lipvector.pt", map_location=self.device)
-        self.blue_eyes_vector = torch.load("./latent_vectors/2blue_eyes.pt", map_location=self.device)
-        self.asian_vector = torch.load("./latent_vectors/asian10.pt", map_location=self.device)
     def create_gif(self, total_duration, extend_frames, gif_name="face_edit.gif"):
         images = []
-        folder = self.state_id
         paths = glob.glob(folder + "/*")
         frame_duration = total_duration / len(paths)
         print(len(paths), "frame dur", frame_duration)
         durations = [frame_duration] * len(paths)
         if extend_frames:
-            durations [0] = 1.5
-            durations [-1] = 3
         for file_name in os.listdir(folder):
-            if file_name.endswith('.png'):
                 file_path = os.path.join(folder, file_name)
                 images.append(imageio.imread(file_path))
         imageio.mimsave(gif_name, images, duration=durations)
         return gif_name
     def init_transforms(self):
         self.blue_eyes = torch.zeros_like(self.lip_vector)
         self.lip_size = torch.zeros_like(self.lip_vector)
         self.asian_transform = torch.zeros_like(self.lip_vector)
         self.current_prompt_transforms = [torch.zeros_like(self.lip_vector)]
     def clear_transforms(self):
-        global num
         self.init_transforms()
         clear_img_dir("./img_history")
-        num = 0
         return self._render_all_transformations()
-    def _apply_vector(self, src, vector):
-        new_latent = torch.lerp(src, src + vector, 1)
-        return new_latent
-    def _decode_latent_to_pil(self, latent):
         current_im = self.vqgan.decode(latent.to(self.device))[0]
         return custom_to_pil(current_im)
     def _get_mask(self, img, mask=None):
         if img and "mask" in img and img["mask"] is not None:
             attn_mask = torchvision.transforms.ToTensor()(img["mask"])
             attn_mask = torch.ceil(attn_mask[0].to(self.device))
             print("mask set successfully")
-            print(type(attn_mask))
-            print(attn_mask.shape)
         else:
             attn_mask = mask
         return attn_mask
     def set_mask(self, img):
         self.attn_mask = self._get_mask(img)
         x = self.attn_mask.clone()
         x = x.detach().cpu()
-        x = torch.clamp(x, -1., 1.)
-        x = (x + 1.)/2.
         x = x.numpy()
         x = (255 * x).astype(np.uint8)
         x = Image.fromarray(x, "L")
         return x
-    @torch.no_grad()
     def _render_all_transformations(self, return_twice=True):
         global num
-        if self.state_id is None:
-            self.state_id = "./img_history/" + str(uuid.uuid4())
-            print("redner all", self.state_id)
-        current_vector_transforms = (self.blue_eyes, self.lip_size, self.asian_transform, sum(self.current_prompt_transforms))
         new_latent = self.blend_latent + sum(current_vector_transforms)
         if self.quant:
             new_latent, _, _ = self.vqgan.quantize(new_latent.to(self.device))
-        image = self._decode_latent_to_pil(new_latent)
-        img_dir = self.state_id
-        if not os.path.exists("img_history"):
-            os.mkdir("./img_history")
-        if not os.path.exists(img_dir):
-            os.mkdir(img_dir)
-        image.save(f"{img_dir}/img_{num:06}.png")
         num += 1
         return (image, image) if return_twice else image
     def apply_rb_vector(self, weight):
         self.blue_eyes = weight * self.blue_eyes_vector
         return self._render_all_transformations()
     def apply_lip_vector(self, weight):
         self.lip_size = weight * self.lip_vector
         return self._render_all_transformations()
     def update_quant(self, val):
         self.quant = val
         return self._render_all_transformations()
     def apply_asian_vector(self, weight):
         self.asian_transform = weight * self.asian_vector
         return self._render_all_transformations()
     def update_images(self, path1, path2, blend_weight):
         if path1 is None and path2 is None:
             return None
-        if path1 is None: path1 = path2
-        if path2 is None: path2 = path1
         self.path1, self.path2 = path1, path2
-        if self.state_id:
-            clear_img_dir(self.state_id)
         return self.blend(blend_weight)
-    @torch.no_grad()
     def blend(self, weight):
-        _, latent = blend_paths(self.vqgan, self.path1, self.path2, weight=weight, show=False, device=self.device)
         self.blend_latent = latent
         return self._render_all_transformations()
-    @torch.no_grad()
     def rewind(self, index):
         if not self.transform_history:
-            print("no history")
             return self._render_all_transformations()
         prompt_transform = self.transform_history[-1]
         latent_index = int(index / 100 * (prompt_transform.iterations - 1))
         print(latent_index)
-        self.current_prompt_transforms[-1] = prompt_transform.transforms[latent_index].to(self.device)
         return self._render_all_transformations()
-    def apply_prompts(self, positive_prompts, negative_prompts, lr, iterations, lpips_weight, reconstruction_steps):
-        if self.state_id is None:
-            self.state_id = "./img_history/" + str(uuid.uuid4())
-        transform_log = PromptTransformHistory(iterations + reconstruction_steps)
-        transform_log.transforms.append(torch.zeros_like(self.blend_latent, requires_grad=False))
-        self.current_prompt_transforms.append(torch.zeros_like(self.blend_latent, requires_grad=False))
         if log:
-            wandb.init(reinit=True, project="face-editor")
-            wandb.config.update({"Positive Prompts": positive_prompts})
-            wandb.config.update({"Negative Prompts": negative_prompts})
-            wandb.config.update(dict(
-                lr=lr,
-                iterations=iterations,
-                lpips_weight=lpips_weight
-            ))
         positive_prompts = [prompt.strip() for prompt in positive_prompts.split("|")]
         negative_prompts = [prompt.strip() for prompt in negative_prompts.split("|")]
-        self.prompt_optim.set_params(lr, iterations, lpips_weight, attn_mask=self.attn_mask, reconstruction_steps=reconstruction_steps)
-        for i, transform in enumerate(self.prompt_optim.optimize(self.blend_latent,
-                                                                positive_prompts,
-                                                                negative_prompts)):
             transform_log.transforms.append(transform.detach().cpu())
             self.current_prompt_transforms[-1] = transform
-            with torch.no_grad():
                 image = self._render_all_transformations(return_twice=False)
             if log:
                 wandb.log({"image": wandb.Image(image)})
@@ -187,4 +231,4 @@ class ImageState:
         self.attn_mask = None
         self.transform_history.append(transform_log)
         gc.collect()
-        torch.cuda.empty_cache()

+import numpy as np
 import gc
+import os
 import imageio
 import glob
 import uuid
 from animation import clear_img_dir
+from backend import ImagePromptEditor, log
 import torch
 import torchvision
 import wandb
 from edit import blend_paths
 from img_processing import custom_to_pil
+from PIL import Image
 num = 0
+class PromptTransformHistory:
     def __init__(self, iterations) -> None:
         self.iterations = iterations
         self.transforms = []
 class ImageState:
+    def __init__(self, vqgan, prompt_optimizer: ImagePromptEditor) -> None:
         self.vqgan = vqgan
         self.device = vqgan.device
         self.blend_latent = None
         self.quant = True
         self.path1 = None
         self.path2 = None
+        self.img_dir = "./img_history"
+        if not os.path.exists(self.img_dir):
+            os.mkdir(self.img_dir)
         self.transform_history = []
         self.attn_mask = None
         self.prompt_optim = prompt_optimizer
         self._load_vectors()
         self.init_transforms()
     def _load_vectors(self):
+        self.lip_vector = torch.load(
+            "./latent_vectors/lipvector.pt", map_location=self.device
+        )
+        self.blue_eyes_vector = torch.load(
+            "./latent_vectors/2blue_eyes.pt", map_location=self.device
+        )
+        self.asian_vector = torch.load(
+            "./latent_vectors/asian10.pt", map_location=self.device
+        )
     def create_gif(self, total_duration, extend_frames, gif_name="face_edit.gif"):
         images = []
+        folder = self.img_dir
         paths = glob.glob(folder + "/*")
         frame_duration = total_duration / len(paths)
         print(len(paths), "frame dur", frame_duration)
         durations = [frame_duration] * len(paths)
         if extend_frames:
+            durations[0] = 1.5
+            durations[-1] = 3
         for file_name in os.listdir(folder):
+            if file_name.endswith(".png"):
                 file_path = os.path.join(folder, file_name)
                 images.append(imageio.imread(file_path))
         imageio.mimsave(gif_name, images, duration=durations)
         return gif_name
     def init_transforms(self):
         self.blue_eyes = torch.zeros_like(self.lip_vector)
         self.lip_size = torch.zeros_like(self.lip_vector)
         self.asian_transform = torch.zeros_like(self.lip_vector)
         self.current_prompt_transforms = [torch.zeros_like(self.lip_vector)]
     def clear_transforms(self):
         self.init_transforms()
         clear_img_dir("./img_history")
         return self._render_all_transformations()
+    def _latent_to_pil(self, latent):
         current_im = self.vqgan.decode(latent.to(self.device))[0]
         return custom_to_pil(current_im)
     def _get_mask(self, img, mask=None):
         if img and "mask" in img and img["mask"] is not None:
             attn_mask = torchvision.transforms.ToTensor()(img["mask"])
             attn_mask = torch.ceil(attn_mask[0].to(self.device))
             print("mask set successfully")
         else:
             attn_mask = mask
         return attn_mask
     def set_mask(self, img):
         self.attn_mask = self._get_mask(img)
         x = self.attn_mask.clone()
         x = x.detach().cpu()
+        x = torch.clamp(x, -1.0, 1.0)
+        x = (x + 1.0) / 2.0
         x = x.numpy()
         x = (255 * x).astype(np.uint8)
         x = Image.fromarray(x, "L")
         return x
+    @torch.inference_mode()
     def _render_all_transformations(self, return_twice=True):
         global num
+        current_vector_transforms = (
+            self.blue_eyes,
+            self.lip_size,
+            self.asian_transform,
+            sum(self.current_prompt_transforms),
+        )
         new_latent = self.blend_latent + sum(current_vector_transforms)
         if self.quant:
             new_latent, _, _ = self.vqgan.quantize(new_latent.to(self.device))
+        image = self._latent_to_pil(new_latent)
+        image.save(f"{self.img_dir}/img_{num:06}.png")
         num += 1
         return (image, image) if return_twice else image
     def apply_rb_vector(self, weight):
         self.blue_eyes = weight * self.blue_eyes_vector
         return self._render_all_transformations()
     def apply_lip_vector(self, weight):
         self.lip_size = weight * self.lip_vector
         return self._render_all_transformations()
     def update_quant(self, val):
         self.quant = val
         return self._render_all_transformations()
     def apply_asian_vector(self, weight):
         self.asian_transform = weight * self.asian_vector
         return self._render_all_transformations()
     def update_images(self, path1, path2, blend_weight):
         if path1 is None and path2 is None:
             return None
+        # Duplicate paths if one is empty
+        if path1 is None:
+            path1 = path2
+        if path2 is None:
+            path2 = path1
         self.path1, self.path2 = path1, path2
+        if self.img_dir:
+            clear_img_dir(self.img_dir)
         return self.blend(blend_weight)
+    @torch.inference_mode()
     def blend(self, weight):
+        _, latent = blend_paths(
+            self.vqgan,
+            self.path1,
+            self.path2,
+            weight=weight,
+            show=False,
+            device=self.device,
+        )
         self.blend_latent = latent
         return self._render_all_transformations()
+    @torch.inference_mode()
     def rewind(self, index):
         if not self.transform_history:
+            print("No history")
             return self._render_all_transformations()
         prompt_transform = self.transform_history[-1]
         latent_index = int(index / 100 * (prompt_transform.iterations - 1))
         print(latent_index)
+        self.current_prompt_transforms[-1] = prompt_transform.transforms[
+            latent_index
+        ].to(self.device)
         return self._render_all_transformations()
+    def _init_logging(lr, iterations, lpips_weight, positive_prompts, negative_prompts):
+        wandb.init(reinit=True, project="face-editor")
+        wandb.config.update({"Positive Prompts": positive_prompts})
+        wandb.config.update({"Negative Prompts": negative_prompts})
+        wandb.config.update(
+            dict(lr=lr, iterations=iterations, lpips_weight=lpips_weight)
+        )
+    def apply_prompts(
+        self,
+        positive_prompts,
+        negative_prompts,
+        lr,
+        iterations,
+        lpips_weight,
+        reconstruction_steps,
+    ):
         if log:
+            self._init_logging(
+                lr, iterations, lpips_weight, positive_prompts, negative_prompts
+            )
+        transform_log = PromptTransformHistory(iterations + reconstruction_steps)
+        transform_log.transforms.append(
+            torch.zeros_like(self.blend_latent, requires_grad=False)
+        )
+        self.current_prompt_transforms.append(
+            torch.zeros_like(self.blend_latent, requires_grad=False)
+        )
         positive_prompts = [prompt.strip() for prompt in positive_prompts.split("|")]
         negative_prompts = [prompt.strip() for prompt in negative_prompts.split("|")]
+        self.prompt_optim.set_params(
+            lr,
+            iterations,
+            lpips_weight,
+            attn_mask=self.attn_mask,
+            reconstruction_steps=reconstruction_steps,
+        )
+        for i, transform in enumerate(
+            self.prompt_optim.optimize(
+                self.blend_latent, positive_prompts, negative_prompts
+            )
+        ):
             transform_log.transforms.append(transform.detach().cpu())
             self.current_prompt_transforms[-1] = transform
+            with torch.inference_mode():
                 image = self._render_all_transformations(return_twice=False)
             if log:
                 wandb.log({"image": wandb.Image(image)})
         self.attn_mask = None
         self.transform_history.append(transform_log)
         gc.collect()
+        torch.cuda.empty_cache()

animation.py CHANGED Viewed

@@ -8,21 +8,23 @@ def clear_img_dir(img_dir):
         os.mkdir("img_history")
     if not os.path.exists(img_dir):
         os.mkdir(img_dir)
-    for filename in glob.glob(img_dir+"/*"):
         os.remove(filename)
-def create_gif(total_duration, extend_frames, folder="./img_history", gif_name="face_edit.gif"):
     images = []
     paths = glob.glob(folder + "/*")
     frame_duration = total_duration / len(paths)
     print(len(paths), "frame dur", frame_duration)
     durations = [frame_duration] * len(paths)
     if extend_frames:
-        durations [0] = 1.5
-        durations [-1] = 3
     for file_name in os.listdir(folder):
-        if file_name.endswith('.png'):
             file_path = os.path.join(folder, file_name)
             images.append(imageio.imread(file_path))
     imageio.mimsave(gif_name, images, duration=durations)
@@ -30,4 +32,4 @@ def create_gif(total_duration, extend_frames, folder="./img_history", gif_name="
 if __name__ == "__main__":
-    create_gif()

         os.mkdir("img_history")
     if not os.path.exists(img_dir):
         os.mkdir(img_dir)
+    for filename in glob.glob(img_dir + "/*"):
         os.remove(filename)
+def create_gif(
+    total_duration, extend_frames, folder="./img_history", gif_name="face_edit.gif"
+):
     images = []
     paths = glob.glob(folder + "/*")
     frame_duration = total_duration / len(paths)
     print(len(paths), "frame dur", frame_duration)
     durations = [frame_duration] * len(paths)
     if extend_frames:
+        durations[0] = 1.5
+        durations[-1] = 3
     for file_name in os.listdir(folder):
+        if file_name.endswith(".png"):
             file_path = os.path.join(folder, file_name)
             images.append(imageio.imread(file_path))
     imageio.mimsave(gif_name, images, duration=durations)
 if __name__ == "__main__":
+    create_gif()

app.py CHANGED Viewed

@@ -14,7 +14,7 @@ from transformers import CLIPModel, CLIPProcessor
 from lpips import LPIPS
 import edit
-from backend import ImagePromptOptimizer, ProcessorGradientFlow
 from ImageState import ImageState
 from loaders import load_default
 # from animation import create_gif
@@ -29,14 +29,14 @@ processor = ProcessorGradientFlow(device=device)
 # clip = CLIPModel.from_pretrained("openai/clip-vit-large-patch14")
 lpips_fn = LPIPS(net='vgg').to(device)
 clip = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device)
-promptoptim = ImagePromptOptimizer(vqgan, clip, processor, lpips_fn=lpips_fn, quantize=True)
 def set_img_from_example(state, img):
     return state.update_images(img, img, 0)
 def get_cleared_mask():
     return gr.Image.update(value=None)
-    # mask.clear()
 class StateWrapper:
     def create_gif(state, *args, **kwargs):
         return state, state[0].create_gif(*args, **kwargs)
     def apply_asian_vector(state, *args, **kwargs):
@@ -46,7 +46,6 @@ class StateWrapper:
     def apply_lip_vector(state, *args, **kwargs):
         return state, *state[0].apply_lip_vector(*args, **kwargs)
     def apply_prompts(state, *args, **kwargs):
-        print(state[1])
         for image in state[0].apply_prompts(*args, **kwargs):
             yield state, *image
     def apply_rb_vector(state, *args, **kwargs):
@@ -69,9 +68,10 @@ class StateWrapper:
         return state, *state[0].update_images(*args, **kwargs)
     def update_requant(state, *args, **kwargs):
         return state, *state[0].update_requant(*args, **kwargs)
 with gr.Blocks(css="styles.css") as demo:
-    # id = gr.State(str(uuid.uuid4()))
-    state = gr.State([ImageState(vqgan, promptoptim), str(uuid.uuid4())])
     with gr.Row():
         with gr.Column(scale=1):
             with gr.Row():

 from lpips import LPIPS
 import edit
+from backend import ImagePromptEditor, ProcessorGradientFlow
 from ImageState import ImageState
 from loaders import load_default
 # from animation import create_gif
 # clip = CLIPModel.from_pretrained("openai/clip-vit-large-patch14")
 lpips_fn = LPIPS(net='vgg').to(device)
 clip = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device)
+promptoptim = ImagePromptEditor(vqgan, clip, processor, lpips_fn=lpips_fn, quantize=True)
 def set_img_from_example(state, img):
     return state.update_images(img, img, 0)
 def get_cleared_mask():
     return gr.Image.update(value=None)
 class StateWrapper:
+    """This extremely ugly code is a hacky fix to allow con"""
     def create_gif(state, *args, **kwargs):
         return state, state[0].create_gif(*args, **kwargs)
     def apply_asian_vector(state, *args, **kwargs):
     def apply_lip_vector(state, *args, **kwargs):
         return state, *state[0].apply_lip_vector(*args, **kwargs)
     def apply_prompts(state, *args, **kwargs):
         for image in state[0].apply_prompts(*args, **kwargs):
             yield state, *image
     def apply_rb_vector(state, *args, **kwargs):
         return state, *state[0].update_images(*args, **kwargs)
     def update_requant(state, *args, **kwargs):
         return state, *state[0].update_requant(*args, **kwargs)
 with gr.Blocks(css="styles.css") as demo:
+    state = gr.State([ImageState(vqgan, promptoptim)])
     with gr.Row():
         with gr.Column(scale=1):
             with gr.Row():

backend.py CHANGED Viewed

@@ -1,77 +1,65 @@
-# from functools import cache
-import importlib
-import gradio as gr
 import matplotlib.pyplot as plt
 import torch
 import torchvision
 import wandb
-from icecream import ic
 from torch import nn
-from torchvision.transforms.functional import resize
 from tqdm import tqdm
-from transformers import CLIPModel, CLIPProcessor
-import lpips
-from edit import blend_paths
-from img_processing import *
-from img_processing import custom_to_pil
-from loaders import load_default
-import glob
-import gc
 global log
-log=False
-# ic.disable()
-# ic.enable()
-def get_resized_tensor(x):
-    if len(x.shape) == 2:
-        re = x.unsqueeze(0)
-    else: re = x
-    re = resize(re, (10, 10))
-    return re
-class ProcessorGradientFlow():
     """
     This wraps the huggingface CLIP processor to allow backprop through the image processing step.
-    The original processor forces conversion to numpy then PIL images, which is faster for image processing but breaks gradient flow.
     """
     def __init__(self, device="cuda") -> None:
         self.device = device
         self.processor = CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14")
         self.image_mean = [0.48145466, 0.4578275, 0.40821073]
         self.image_std = [0.26862954, 0.26130258, 0.27577711]
         self.normalize = torchvision.transforms.Normalize(
-            self.image_mean,
-            self.image_std
         )
         self.resize = torchvision.transforms.Resize(224)
         self.center_crop = torchvision.transforms.CenterCrop(224)
     def preprocess_img(self, images):
         images = self.center_crop(images)
         images = self.resize(images)
         images = self.center_crop(images)
         images = self.normalize(images)
         return images
     def __call__(self, images=[], **kwargs):
         processed_inputs = self.processor(**kwargs)
         processed_inputs["pixel_values"] = self.preprocess_img(images)
-        processed_inputs = {key:value.to(self.device) for (key, value) in processed_inputs.items()}
         return processed_inputs
-class ImagePromptOptimizer(nn.Module):
-    def __init__(self,
-                vqgan,
-                clip,
-                clip_preprocessor,
-                lpips_fn,
-                iterations=100,
-                lr = 0.01,
-                save_vector=True,
-                return_val="vector",
-                quantize=True,
-                make_grid=False,
-                lpips_weight = 6.2) -> None:
         super().__init__()
         self.latent = None
         self.device = vqgan.device
@@ -86,14 +74,17 @@ class ImagePromptOptimizer(nn.Module):
         self.quantize = quantize
         self.lpips_weight = lpips_weight
         self.perceptual_loss = lpips_fn
     def set_latent(self, latent):
         self.latent = latent.detach().to(self.device)
     def set_params(self, lr, iterations, lpips_weight, reconstruction_steps, attn_mask):
         self._attn_mask = attn_mask
         self.iterations = iterations
         self.lr = lr
         self.lpips_weight = lpips_weight
         self.reconstruction_steps = reconstruction_steps
     def forward(self, vector):
         base_latent = self.latent.detach().requires_grad_()
         trans_latent = base_latent + vector
@@ -103,19 +94,22 @@ class ImagePromptOptimizer(nn.Module):
             z_q = trans_latent
         dec = self.vqgan.decode(z_q)
         return dec
     def _get_clip_similarity(self, prompts, image, weights=None):
         if isinstance(prompts, str):
             prompts = [prompts]
         elif not isinstance(prompts, list):
             raise TypeError("Provide prompts as string or list of strings")
-        clip_inputs = self.clip_preprocessor(text=prompts,
-            images=image, return_tensors="pt", padding=True)
         clip_outputs = self.clip(**clip_inputs)
         similarity_logits = clip_outputs.logits_per_image
         if weights:
             similarity_logits *= weights
         return similarity_logits.sum()
-    def get_similarity_loss(self, pos_prompts, neg_prompts, image):
         pos_logits = self._get_clip_similarity(pos_prompts, image)
         if neg_prompts:
             neg_logits = self._get_clip_similarity(neg_prompts, image)
@@ -123,6 +117,7 @@ class ImagePromptOptimizer(nn.Module):
             neg_logits = torch.tensor([1], device=self.device)
         loss = -torch.log(pos_logits) + torch.log(neg_logits)
         return loss
     def visualize(self, processed_img):
         if self.make_grid:
             self.index += 1
@@ -131,74 +126,93 @@ class ImagePromptOptimizer(nn.Module):
         else:
             plt.imshow(get_pil(processed_img[0]).detach().cpu())
             plt.show()
     def _attn_mask(self, grad):
         newgrad = grad
         if self._attn_mask is not None:
             newgrad = grad * (self._attn_mask)
         return newgrad
     def _attn_mask_inverse(self, grad):
         newgrad = grad
         if self._attn_mask is not None:
             newgrad = grad * ((self._attn_mask - 1) * -1)
         return newgrad
     def _get_next_inputs(self, transformed_img):
-        processed_img = loop_post_process(transformed_img) #* self.attn_mask
         processed_img.retain_grad()
         lpips_input = processed_img.clone()
         lpips_input.register_hook(self._attn_mask_inverse)
         lpips_input.retain_grad()
         clip_input = processed_img.clone()
         clip_input.register_hook(self._attn_mask)
         clip_input.retain_grad()
-        return processed_img, lpips_input, clip_input
     def optimize(self, latent, pos_prompts, neg_prompts):
         self.set_latent(latent)
-        transformed_img = self(torch.zeros_like(self.latent, requires_grad=True, device=self.device))
         original_img = loop_post_process(transformed_img)
         vector = torch.randn_like(self.latent, requires_grad=True, device=self.device)
         optim = torch.optim.Adam([vector], lr=self.lr)
-        if self.make_grid:
-            plt.figure(figsize=(35, 25))
-            self.index = 1
         for i in tqdm(range(self.iterations)):
-            optim.zero_grad()
-            transformed_img = self(vector)
-            processed_img, lpips_input, clip_input = self._get_next_inputs(transformed_img)
-            with torch.autocast("cuda"):
-                clip_loss = self.get_similarity_loss(pos_prompts, neg_prompts, clip_input)
-                print("CLIP loss", clip_loss)
-                perceptual_loss = self.perceptual_loss(lpips_input, original_img.clone()) * self.lpips_weight
-                print("LPIPS loss: ", perceptual_loss)
-            if log:
-                wandb.log({"Perceptual Loss": perceptual_loss})
-                wandb.log({"CLIP Loss": clip_loss})
-            clip_loss.backward(retain_graph=True)
-            perceptual_loss.backward(retain_graph=True)
-            p2 = processed_img.grad
-            print("Sum Loss", perceptual_loss + clip_loss)
-            optim.step()
-            # if i % self.iterations // 10 == 0:
-                # self.visualize(transformed_img)
-            yield vector
-        if self.make_grid:
-            plt.savefig(f"plot {pos_prompts[0]}.png")
-            plt.show()
-        print("lpips solo op")
         for i in range(self.reconstruction_steps):
-            optim.zero_grad()
-            transformed_img = self(vector)
-            processed_img = loop_post_process(transformed_img) #* self.attn_mask
-            processed_img.retain_grad()
-            lpips_input = processed_img.clone()
-            lpips_input.register_hook(self._attn_mask_inverse)
-            lpips_input.retain_grad()
-            with torch.autocast("cuda"):
-                perceptual_loss = self.perceptual_loss(lpips_input, original_img.clone()) * self.lpips_weight
-            if log:
-                wandb.log({"Perceptual Loss": perceptual_loss})
-            print("LPIPS loss: ", perceptual_loss)
-            perceptual_loss.backward(retain_graph=True)
-            optim.step()
-            yield vector
         yield vector if self.return_val == "vector" else self.latent + vector

 import matplotlib.pyplot as plt
 import torch
 import torchvision
 import wandb
 from torch import nn
 from tqdm import tqdm
+from transformers import CLIPProcessor
+from img_processing import get_pil, loop_post_process
 global log
+log = False
+class ProcessorGradientFlow:
     """
     This wraps the huggingface CLIP processor to allow backprop through the image processing step.
+    The original processor forces conversion to numpy then PIL images, which is faster for image processing but breaks gradient flow.
     """
     def __init__(self, device="cuda") -> None:
         self.device = device
         self.processor = CLIPProcessor.from_pretrained("openai/clip-vit-large-patch14")
         self.image_mean = [0.48145466, 0.4578275, 0.40821073]
         self.image_std = [0.26862954, 0.26130258, 0.27577711]
         self.normalize = torchvision.transforms.Normalize(
+            self.image_mean, self.image_std
         )
         self.resize = torchvision.transforms.Resize(224)
         self.center_crop = torchvision.transforms.CenterCrop(224)
     def preprocess_img(self, images):
         images = self.center_crop(images)
         images = self.resize(images)
         images = self.center_crop(images)
         images = self.normalize(images)
         return images
     def __call__(self, images=[], **kwargs):
         processed_inputs = self.processor(**kwargs)
         processed_inputs["pixel_values"] = self.preprocess_img(images)
+        processed_inputs = {
+            key: value.to(self.device) for (key, value) in processed_inputs.items()
+        }
         return processed_inputs
+class ImagePromptEditor(nn.Module):
+    def __init__(
+        self,
+        vqgan,
+        clip,
+        clip_preprocessor,
+        lpips_fn,
+        iterations=100,
+        lr=0.01,
+        save_vector=True,
+        return_val="vector",
+        quantize=True,
+        make_grid=False,
+        lpips_weight=6.2,
+    ) -> None:
         super().__init__()
         self.latent = None
         self.device = vqgan.device
         self.quantize = quantize
         self.lpips_weight = lpips_weight
         self.perceptual_loss = lpips_fn
     def set_latent(self, latent):
         self.latent = latent.detach().to(self.device)
     def set_params(self, lr, iterations, lpips_weight, reconstruction_steps, attn_mask):
         self._attn_mask = attn_mask
         self.iterations = iterations
         self.lr = lr
         self.lpips_weight = lpips_weight
         self.reconstruction_steps = reconstruction_steps
     def forward(self, vector):
         base_latent = self.latent.detach().requires_grad_()
         trans_latent = base_latent + vector
             z_q = trans_latent
         dec = self.vqgan.decode(z_q)
         return dec
     def _get_clip_similarity(self, prompts, image, weights=None):
         if isinstance(prompts, str):
             prompts = [prompts]
         elif not isinstance(prompts, list):
             raise TypeError("Provide prompts as string or list of strings")
+        clip_inputs = self.clip_preprocessor(
+            text=prompts, images=image, return_tensors="pt", padding=True
+        )
         clip_outputs = self.clip(**clip_inputs)
         similarity_logits = clip_outputs.logits_per_image
         if weights:
             similarity_logits *= weights
         return similarity_logits.sum()
+    def _get_CLIP_loss(self, pos_prompts, neg_prompts, image):
         pos_logits = self._get_clip_similarity(pos_prompts, image)
         if neg_prompts:
             neg_logits = self._get_clip_similarity(neg_prompts, image)
             neg_logits = torch.tensor([1], device=self.device)
         loss = -torch.log(pos_logits) + torch.log(neg_logits)
         return loss
     def visualize(self, processed_img):
         if self.make_grid:
             self.index += 1
         else:
             plt.imshow(get_pil(processed_img[0]).detach().cpu())
             plt.show()
     def _attn_mask(self, grad):
         newgrad = grad
         if self._attn_mask is not None:
             newgrad = grad * (self._attn_mask)
         return newgrad
     def _attn_mask_inverse(self, grad):
         newgrad = grad
         if self._attn_mask is not None:
             newgrad = grad * ((self._attn_mask - 1) * -1)
         return newgrad
     def _get_next_inputs(self, transformed_img):
+        processed_img = loop_post_process(transformed_img)  # * self.attn_mask
         processed_img.retain_grad()
         lpips_input = processed_img.clone()
         lpips_input.register_hook(self._attn_mask_inverse)
         lpips_input.retain_grad()
         clip_input = processed_img.clone()
         clip_input.register_hook(self._attn_mask)
         clip_input.retain_grad()
+        return (processed_img, lpips_input, clip_input)
+    def _optimize_CLIP_LPIPS(self, optim, original_img, vector, pos_prompts, neg_prompts):
+        optim.zero_grad()
+        transformed_img = self(vector)
+        processed_img, lpips_input, clip_input = self._get_next_inputs(
+            transformed_img
+        )
+        with torch.autocast("cuda"):
+            clip_loss = self._get_CLIP_loss(pos_prompts, neg_prompts, clip_input)
+            print("CLIP loss", clip_loss)
+            perceptual_loss = (
+                self.perceptual_loss(lpips_input, original_img.clone())
+                * self.lpips_weight
+            )
+            print("LPIPS loss: ", perceptual_loss)
+            print("Sum Loss", perceptual_loss + clip_loss)
+        if log:
+            wandb.log({"Perceptual Loss": perceptual_loss})
+            wandb.log({"CLIP Loss": clip_loss})
+        # These gradients will be masked if attn_mask has been set
+        clip_loss.backward(retain_graph=True)
+        perceptual_loss.backward(retain_graph=True)
+        optim.step()
+        yield vector
+    def _optimize_LPIPS(self, vector, original_img, optim):
+        optim.zero_grad()
+        transformed_img = self(vector)
+        processed_img = loop_post_process(transformed_img)  # * self.attn_mask
+        processed_img.retain_grad()
+        lpips_input = processed_img.clone()
+        lpips_input.register_hook(self._attn_mask_inverse)
+        lpips_input.retain_grad()
+        with torch.autocast("cuda"):
+            perceptual_loss = (
+                self.perceptual_loss(lpips_input, original_img.clone())
+                * self.lpips_weight
+            )
+        if log:
+            wandb.log({"Perceptual Loss": perceptual_loss})
+        print("LPIPS loss: ", perceptual_loss)
+        perceptual_loss.backward(retain_graph=True)
+        optim.step()
+        yield vector
     def optimize(self, latent, pos_prompts, neg_prompts):
         self.set_latent(latent)
+        transformed_img = self(
+            torch.zeros_like(self.latent, requires_grad=True, device=self.device)
+        )
         original_img = loop_post_process(transformed_img)
         vector = torch.randn_like(self.latent, requires_grad=True, device=self.device)
         optim = torch.optim.Adam([vector], lr=self.lr)
         for i in tqdm(range(self.iterations)):
+            yield self._optimize_CLIP_LPIPS(optim, original_img, vector, pos_prompts, neg_prompts)
+        print("Running LPIPS optim only")
         for i in range(self.reconstruction_steps):
+            yield self._optimize_LPIPS(vector, original_img, transformed_img, optim)
         yield vector if self.return_val == "vector" else self.latent + vector

edit.py CHANGED Viewed

@@ -12,7 +12,7 @@ import PIL
 import taming
 import torch
-from loaders import load_config
 from utils import get_device
@@ -25,11 +25,14 @@ def get_embedding(model, path=None, img=None, device="cpu"):
     z, _, [_, _, indices] = model.encode(x_processed)
     return z
-def blend_paths(model, path1, path2, quantize=False, weight=0.5, show=True, device="cuda"):
     x = preprocess(PIL.Image.open(path1), target_image_size=256).to(device)
     y = preprocess(PIL.Image.open(path2), target_image_size=256).to(device)
-    x_latent, y_latent = get_embedding(model, path=path1, device=device), get_embedding(model, path=path2, device=device)
     z = torch.lerp(x_latent, y_latent, weight)
     if quantize:
         z = model.quantize(z)[0]
@@ -45,14 +48,16 @@ def blend_paths(model, path1, path2, quantize=False, weight=0.5, show=True, devi
         plt.show()
     return custom_to_pil(decoded), z
 if __name__ == "__main__":
     device = get_device()
-    ckpt_path = "logs/2021-04-23T18-11-19_celebahq_transformer/checkpoints/last.ckpt"
-    conf_path = "./unwrapped.yaml"
-    config = load_config(conf_path, display=False)
-    model = taming.models.vqgan.VQModel(**config.model.params)
-    sd = torch.load("./vqgan_only.pt", map_location="mps")
-    model.load_state_dict(sd, strict=True)
     model.to(device)
-    blend_paths(model, "./test_data/face.jpeg", "./test_data/face2.jpeg", quantize=False, weight=.5)
-    plt.show()

 import taming
 import torch
+from loaders import load_config, load_default
 from utils import get_device
     z, _, [_, _, indices] = model.encode(x_processed)
     return z
+def blend_paths(
+    model, path1, path2, quantize=False, weight=0.5, show=True, device="cuda"
+):
     x = preprocess(PIL.Image.open(path1), target_image_size=256).to(device)
     y = preprocess(PIL.Image.open(path2), target_image_size=256).to(device)
+    x_latent = get_embedding(model, path=path1, device=device)
+    y_latent = get_embedding(model, path=path2, device=device)
     z = torch.lerp(x_latent, y_latent, weight)
     if quantize:
         z = model.quantize(z)[0]
         plt.show()
     return custom_to_pil(decoded), z
 if __name__ == "__main__":
     device = get_device()
+    model = load_default(device)
     model.to(device)
+    blend_paths(
+        model,
+        "./test_data/face.jpeg",
+        "./test_data/face2.jpeg",
+        quantize=False,
+        weight=0.5,
+    )
+    plt.show()

img_processing.py CHANGED Viewed

@@ -1,12 +1,9 @@
 import io
-import os
-import sys
 import numpy as np
 import PIL
 import requests
 import torch
-import torch.nn.functional as F
 import torchvision.transforms as T
 import torchvision.transforms.functional as TF
 from PIL import Image, ImageDraw, ImageFont
@@ -20,10 +17,10 @@ def download_image(url):
 def preprocess(img, target_image_size=256, map_dalle=False):
     s = min(img.size)
     if s < target_image_size:
-        raise ValueError(f'min dim for image {s} < {target_image_size}')
     r = target_image_size / s
     s = (round(r * img.size[1]), round(r * img.size[0]))
     img = TF.resize(img, s, interpolation=PIL.Image.LANCZOS)
@@ -31,42 +28,49 @@ def preprocess(img, target_image_size=256, map_dalle=False):
     img = torch.unsqueeze(T.ToTensor()(img), 0)
     return img
 def preprocess_vqgan(x):
-  x = 2. * x - 1.
-  return x
 def custom_to_pil(x, process=True, mode="RGB"):
-  x = x.detach().cpu()
-  if process:
-    x = torch.clamp(x, -1., 1.)
-    x = (x + 1.)/2.
-  x = x.permute(1,2,0).numpy()
-  if process:
-    x = (255*x).astype(np.uint8)
-  x = Image.fromarray(x)
-  if not x.mode == mode:
-    x = x.convert(mode)
-  return x
 def get_pil(x):
-  x = torch.clamp(x, -1., 1.)
-  x = (x + 1.)/2.
-  x = x.permute(1,2,0)
-  return x
 def loop_post_process(x):
-  x = get_pil(x.squeeze())
-  return x.permute(2, 0, 1).unsqueeze(0)
 def stack_reconstructions(input, x0, x1, x2, x3, titles=[]):
-  assert input.size == x1.size == x2.size == x3.size
-  w, h = input.size[0], input.size[1]
-  img = Image.new("RGB", (5*w, h))
-  img.paste(input, (0,0))
-  img.paste(x0, (1*w,0))
-  img.paste(x1, (2*w,0))
-  img.paste(x2, (3*w,0))
-  img.paste(x3, (4*w,0))
-  for i, title in enumerate(titles):
-    ImageDraw.Draw(img).text((i*w, 0), f'{title}', (255, 255, 255), font=font) # coordinates, text, color, font
-  return img

 import io
 import numpy as np
 import PIL
 import requests
 import torch
 import torchvision.transforms as T
 import torchvision.transforms.functional as TF
 from PIL import Image, ImageDraw, ImageFont
 def preprocess(img, target_image_size=256, map_dalle=False):
     s = min(img.size)
     if s < target_image_size:
+        raise ValueError(f"min dim for image {s} < {target_image_size}")
     r = target_image_size / s
     s = (round(r * img.size[1]), round(r * img.size[0]))
     img = TF.resize(img, s, interpolation=PIL.Image.LANCZOS)
     img = torch.unsqueeze(T.ToTensor()(img), 0)
     return img
 def preprocess_vqgan(x):
+    x = 2.0 * x - 1.0
+    return x
 def custom_to_pil(x, process=True, mode="RGB"):
+    x = x.detach().cpu()
+    if process:
+        x = torch.clamp(x, -1.0, 1.0)
+        x = (x + 1.0) / 2.0
+    x = x.permute(1, 2, 0).numpy()
+    if process:
+        x = (255 * x).astype(np.uint8)
+    x = Image.fromarray(x)
+    if not x.mode == mode:
+        x = x.convert(mode)
+    return x
 def get_pil(x):
+    x = torch.clamp(x, -1.0, 1.0)
+    x = (x + 1.0) / 2.0
+    x = x.permute(1, 2, 0)
+    return x
 def loop_post_process(x):
+    x = get_pil(x.squeeze())
+    return x.permute(2, 0, 1).unsqueeze(0)
 def stack_reconstructions(input, x0, x1, x2, x3, titles=[]):
+    assert input.size == x1.size == x2.size == x3.size
+    w, h = input.size[0], input.size[1]
+    img = Image.new("RGB", (5 * w, h))
+    img.paste(input, (0, 0))
+    img.paste(x0, (1 * w, 0))
+    img.paste(x1, (2 * w, 0))
+    img.paste(x2, (3 * w, 0))
+    img.paste(x3, (4 * w, 0))
+    for i, title in enumerate(titles):
+        ImageDraw.Draw(img).text(
+            (i * w, 0), f"{title}", (255, 255, 255), font=font
+        )  # coordinates, text, color, font
+    return img

loaders.py CHANGED Viewed

@@ -10,17 +10,17 @@ from utils import get_device
 def load_config(config_path, display=False):
-  config = OmegaConf.load(config_path)
-  if display:
-    print(yaml.dump(OmegaConf.to_container(config)))
-  return config
 def load_default(device):
-    ckpt_path = "logs/2021-04-23T18-11-19_celebahq_transformer/checkpoints/last.ckpt"
-    conf_path = "./unwrapped.yaml"
     config = load_config(conf_path, display=False)
     model = taming.models.vqgan.VQModel(**config.model.params)
-    sd = torch.load("./model_checkpoints/vqgan_only.pt", map_location=device)
     model.load_state_dict(sd, strict=True)
     model.to(device)
     del sd
@@ -34,17 +34,14 @@ def load_vqgan(config, ckpt_path=None, is_gumbel=False):
         missing, unexpected = model.load_state_dict(sd, strict=False)
     return model.eval()
-def load_ffhq():
-    conf = "2020-11-09T13-33-36_faceshq_vqgan/configs/2020-11-09T13-33-36-project.yaml"
-    ckpt = "2020-11-09T13-33-36_faceshq_vqgan/checkpoints/last.ckpt"
-    vqgan = load_model(load_config(conf), ckpt, True, True)[0]
 def reconstruct_with_vqgan(x, model):
-  # could also use model(x) for reconstruction but use explicit encoding and decoding here
-  z, _, [_, _, indices] = model.encode(x)
-  print(f"VQGAN --- {model.__class__.__name__}: latent shape: {z.shape[2:]}")
-  xrec = model.decode(z)
-  return xrec
 def get_obj_from_str(string, reload=False):
     module, cls = string.rsplit(".", 1)
     if reload:
@@ -52,12 +49,13 @@ def get_obj_from_str(string, reload=False):
         importlib.reload(module_imp)
     return getattr(importlib.import_module(module, package=None), cls)
-def instantiate_from_config(config):
-    if not "target" in config:
         raise KeyError("Expected key `target` to instantiate.")
     return get_obj_from_str(config["target"])(**config.get("params", dict()))
 def load_model_from_config(config, sd, gpu=True, eval_mode=True):
     model = instantiate_from_config(config)
     if sd is not None:
@@ -78,5 +76,7 @@ def load_model(config, ckpt, gpu, eval_mode):
     else:
         pl_sd = {"state_dict": None}
         global_step = None
-    model = load_model_from_config(config.model, pl_sd["state_dict"], gpu=gpu, eval_mode=eval_mode)["model"]
-    return model, global_step

 def load_config(config_path, display=False):
+    config = OmegaConf.load(config_path)
+    if display:
+        print(yaml.dump(OmegaConf.to_container(config)))
+    return config
 def load_default(device):
+    conf_path = "./celeba_vqgan/unwrapped.yaml"
     config = load_config(conf_path, display=False)
     model = taming.models.vqgan.VQModel(**config.model.params)
+    sd = torch.load("./celeba_vqgan/vqgan_only.pt", map_location=device)
     model.load_state_dict(sd, strict=True)
     model.to(device)
     del sd
         missing, unexpected = model.load_state_dict(sd, strict=False)
     return model.eval()
 def reconstruct_with_vqgan(x, model):
+    z, _, [_, _, indices] = model.encode(x)
+    print(f"VQGAN --- {model.__class__.__name__}: latent shape: {z.shape[2:]}")
+    xrec = model.decode(z)
+    return xrec
 def get_obj_from_str(string, reload=False):
     module, cls = string.rsplit(".", 1)
     if reload:
         importlib.reload(module_imp)
     return getattr(importlib.import_module(module, package=None), cls)
+def instantiate_from_config(config):
+    if "target" not in config:
         raise KeyError("Expected key `target` to instantiate.")
     return get_obj_from_str(config["target"])(**config.get("params", dict()))
 def load_model_from_config(config, sd, gpu=True, eval_mode=True):
     model = instantiate_from_config(config)
     if sd is not None:
     else:
         pl_sd = {"state_dict": None}
         global_step = None
+    model = load_model_from_config(
+        config.model, pl_sd["state_dict"], gpu=gpu, eval_mode=eval_mode
+    )["model"]
+    return model, global_step

masking.py CHANGED Viewed

@@ -3,30 +3,28 @@ import sys
 import matplotlib.pyplot as plt
 import torch
-sys.path.append("taming-transformers")
-import functools
-import gradio as gr
-from transformers import CLIPModel, CLIPProcessor
-import edit
-# import importlib
-# importlib.reload(edit)
-from backend import ImagePromptOptimizer, ImageState, ProcessorGradientFlow
-from loaders import load_default
-device = "cuda"
-vqgan = load_default(device)
-vqgan.eval()
-processor = ProcessorGradientFlow(device=device)
-clip = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
-clip.to(device)
-promptoptim = ImagePromptOptimizer(vqgan, clip, processor, quantize=True)
-state = ImageState(vqgan, promptoptim)
-mask = torch.load("eyebrow_mask.pt")
-x = state.blend("./test_data/face.jpeg", "./test_data/face2.jpeg", 0.5)
-plt.imshow(x)
-plt.show()
-state.apply_prompts("a picture of a woman with big eyebrows", "", 0.009, 40, None, mask=mask)
-print('done')

 import matplotlib.pyplot as plt
 import torch
+from backend import ImagePromptEditor, ImageState, ProcessorGradientFlow
+from loaders import load_default
+from transformers import CLIPModel
+if __name__ == "__main__":
+    sys.path.append("taming-transformers")
+    device = "cuda"
+    vqgan = load_default(device)
+    vqgan.eval()
+    processor = ProcessorGradientFlow(device=device)
+    clip = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
+    clip.to(device)
+    promptoptim = ImagePromptEditor(vqgan, clip, processor, quantize=True)
+    state = ImageState(vqgan, promptoptim)
+    mask = torch.load("eyebrow_mask.pt")
+    x = state.blend("./test_data/face.jpeg", "./test_data/face2.jpeg", 0.5)
+    plt.imshow(x)
+    plt.show()
+    state.apply_prompts(
+        "a picture of a woman with big eyebrows", "", 0.009, 40, None, mask=mask
+    )
+    print("done")

presets.py CHANGED Viewed

@@ -1,16 +1,42 @@
 import gradio as gr
 def set_preset(config_str):
-    choices=["Small Masked Changes (e.g. add lipstick)", "Major Masked Changes (e.g. change hair color or nose size)", "Major Global Changes (e.g. change race / gender"]
     if config_str == choices[0]:
         return set_small_local()
     elif config_str == choices[1]:
         return set_major_local()
     elif config_str == choices[2]:
         return set_major_global()
 def set_small_local():
-    return (gr.Slider.update(value=25), gr.Slider.update(value=0.15), gr.Slider.update(value=1), gr.Slider.update(value=4))
 def set_major_local():
-    return (gr.Slider.update(value=25), gr.Slider.update(value=0.25), gr.Slider.update(value=35), gr.Slider.update(value=10))
 def set_major_global():
-    return (gr.Slider.update(value=30), gr.Slider.update(value=0.1), gr.Slider.update(value=2), gr.Slider.update(value=0.2))

 import gradio as gr
 def set_preset(config_str):
+    choices = [
+        "Small Masked Changes (e.g. add lipstick)",
+        "Major Masked Changes (e.g. change hair color or nose size)",
+        "Major Global Changes (e.g. change race / gender",
+    ]
     if config_str == choices[0]:
         return set_small_local()
     elif config_str == choices[1]:
         return set_major_local()
     elif config_str == choices[2]:
         return set_major_global()
 def set_small_local():
+    return (
+        gr.Slider.update(value=25),
+        gr.Slider.update(value=0.15),
+        gr.Slider.update(value=1),
+        gr.Slider.update(value=4),
+    )
 def set_major_local():
+    return (
+        gr.Slider.update(value=25),
+        gr.Slider.update(value=0.25),
+        gr.Slider.update(value=35),
+        gr.Slider.update(value=10),
+    )
 def set_major_global():
+    return (
+        gr.Slider.update(value=30),
+        gr.Slider.update(value=0.1),
+        gr.Slider.update(value=2),
+        gr.Slider.update(value=0.2),
+    )

prompts.py CHANGED Viewed

@@ -1,17 +1,41 @@
 import random
 class PromptSet:
     def __init__(self, pos, neg, config=None):
         self.positive = pos
         self.negative = neg
         self.config = config
 example_prompts = (
-    PromptSet("a picture of a woman with light blonde hair", "a picture of a person with dark hair | a picture of a person with brown hair"),
-    PromptSet("A picture of a woman with very thick eyebrows", "a picture of a person with very thin eyebrows | a picture of a person with no eyebrows"),
-    PromptSet("A picture of a woman wearing bright red lipstick", "a picture of a person wearing no lipstick | a picture of a person wearing dark lipstick"),
-    PromptSet("A picture of a beautiful chinese woman | a picture of a Japanese woman | a picture of an Asian woman", "a picture of a white woman | a picture of an Indian woman | a picture of a black woman"),
-    PromptSet("A picture of a handsome man | a picture of a masculine man", "a picture of a woman | a picture of a feminine person"),
-    PromptSet("A picture of a woman with a very big nose", "a picture of a person with a small nose | a picture of a person with a normal nose"),
 )
 def get_random_prompts():
     prompt = random.choice(example_prompts)
-    return prompt.positive, prompt.negative

 import random
 class PromptSet:
     def __init__(self, pos, neg, config=None):
         self.positive = pos
         self.negative = neg
         self.config = config
 example_prompts = (
+    PromptSet(
+        "a picture of a woman with light blonde hair",
+        "a picture of a person with dark hair | a picture of a person with brown hair",
+    ),
+    PromptSet(
+        "A picture of a woman with very thick eyebrows",
+        "a picture of a person with very thin eyebrows | a picture of a person with no eyebrows",
+    ),
+    PromptSet(
+        "A picture of a woman wearing bright red lipstick",
+        "a picture of a person wearing no lipstick | a picture of a person wearing dark lipstick",
+    ),
+    PromptSet(
+        "A picture of a beautiful chinese woman | a picture of a Japanese woman | a picture of an Asian woman",
+        "a picture of a white woman | a picture of an Indian woman | a picture of a black woman",
+    ),
+    PromptSet(
+        "A picture of a handsome man | a picture of a masculine man",
+        "a picture of a woman | a picture of a feminine person",
+    ),
+    PromptSet(
+        "A picture of a woman with a very big nose",
+        "a picture of a person with a small nose | a picture of a person with a normal nose",
+    ),
 )
 def get_random_prompts():
     prompt = random.choice(example_prompts)
+    return prompt.positive, prompt.negative

unwrapped.yaml DELETED Viewed

@@ -1,37 +0,0 @@
-model:
-  target: taming.models.vqgan.VQModel
-  params:
-    embed_dim: 256
-    n_embed: 1024
-    ddconfig:
-      double_z: false
-      z_channels: 256
-      resolution: 256
-      in_channels: 3
-      out_ch: 3
-      ch: 128
-      ch_mult:
-      - 1
-      - 1
-      - 2
-      - 2
-      - 4
-      num_res_blocks: 2
-      attn_resolutions:
-      - 16
-      dropout: 0.0
-    lossconfig:
-      target: taming.modules.losses.vqperceptual.DummyLoss
-data:
-  target: cutlit.DataModuleFromConfig
-  params:
-    batch_size: 24
-    num_workers: 24
-    train:
-      target: taming.data.faceshq.CelebAHQTrain
-      params:
-        size: 256
-    validation:
-      target: taming.data.faceshq.CelebAHQValidation
-      params:
-        size: 256

utils.py CHANGED Viewed

@@ -7,9 +7,11 @@ import torch.nn.functional as F
 from skimage.color import lab2rgb, rgb2lab
 from torch import nn
 def freeze_module(module):
     for param in module.parameters():
-      param.requires_grad = False
 def get_device():
     device = "cuda" if torch.cuda.is_available() else "cpu"

 from skimage.color import lab2rgb, rgb2lab
 from torch import nn
 def freeze_module(module):
     for param in module.parameters():
+        param.requires_grad = False
 def get_device():
     device = "cuda" if torch.cuda.is_available() else "cpu"