Spaces:

erwann
/

Face-editor

Configuration error

App Files Files Community

erwann commited on Jan 17, 2023

Commit

2783b1f

1 Parent(s): 29bbf75

update gradio demo

Browse files

Files changed (4) hide show

README.md +9 -0
app.py +0 -6
backend.py +20 -28
presets.py +1 -0

README.md CHANGED Viewed

@@ -9,4 +9,13 @@ app_file: app.py
 pinned: false
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 pinned: false
 ---
+# Face Editor
+This face editor uses a CelebA pretrained VQGAN with CLIP to allow prompt-based image manipulation, as well as slider based manipulation using extracted latent vectors.
+I've written a series of Medium articles which provide a detailed and beginner-friendly explanation of how this was built.
+## Features:
+Edit masking using custom backpropagation hook
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py CHANGED Viewed

@@ -183,17 +183,11 @@ with gr.Blocks(css="styles.css") as demo:
                                             value=3,
                                             step=1,
                                             label="Steps to run at the end of the optimization, optimizing only the masked perceptual loss. If the edit is changing the identity too much, this setting will run steps at the end that 'pull' the image back towards the original identity")
-                    # discriminator_steps = gr.Slider(minimum=0,
-                    #                         maximum=50,
-                    #                         step=1,
-                    #                         value=0,
-                    #                         label="Steps to run at the end, optimizing only the discriminator loss. This helps to reduce artefacts, but because the model is trained on CelebA, this will make your generations look more like generic white celebrities")
     clear.click(StateWrapper.clear_transforms, inputs=[state], outputs=[state, out, mask])
     asian_weight.change(StateWrapper.apply_asian_vector, inputs=[state, asian_weight], outputs=[state, out, mask])
     lip_size.change(StateWrapper.apply_lip_vector, inputs=[state, lip_size], outputs=[state, out, mask])
     blue_eyes.change(StateWrapper.apply_rb_vector, inputs=[state, blue_eyes], outputs=[state, out, mask])
     blend_weight.change(StateWrapper.blend, inputs=[state, blend_weight], outputs=[state, out, mask])
-    # requantize.change(StateWrapper.update_requant, inputs=[state, requantize], outputs=[state, out, mask])
     base_img.change(StateWrapper.update_images, inputs=[state, base_img, blend_img, blend_weight], outputs=[state, out, mask])
     blend_img.change(StateWrapper.update_images, inputs=[state, base_img, blend_img, blend_weight], outputs=[state, out, mask])
     apply_prompts.click(StateWrapper.apply_prompts, inputs=[state, positive_prompts, negative_prompts, learning_rate, iterations, lpips_weight, reconstruction_steps], outputs=[state, out, mask])

                                             value=3,
                                             step=1,
                                             label="Steps to run at the end of the optimization, optimizing only the masked perceptual loss. If the edit is changing the identity too much, this setting will run steps at the end that 'pull' the image back towards the original identity")
     clear.click(StateWrapper.clear_transforms, inputs=[state], outputs=[state, out, mask])
     asian_weight.change(StateWrapper.apply_asian_vector, inputs=[state, asian_weight], outputs=[state, out, mask])
     lip_size.change(StateWrapper.apply_lip_vector, inputs=[state, lip_size], outputs=[state, out, mask])
     blue_eyes.change(StateWrapper.apply_rb_vector, inputs=[state, blue_eyes], outputs=[state, out, mask])
     blend_weight.change(StateWrapper.blend, inputs=[state, blend_weight], outputs=[state, out, mask])
     base_img.change(StateWrapper.update_images, inputs=[state, base_img, blend_img, blend_weight], outputs=[state, out, mask])
     blend_img.change(StateWrapper.update_images, inputs=[state, base_img, blend_img, blend_weight], outputs=[state, out, mask])
     apply_prompts.click(StateWrapper.apply_prompts, inputs=[state, positive_prompts, negative_prompts, learning_rate, iterations, lpips_weight, reconstruction_steps], outputs=[state, out, mask])

backend.py CHANGED Viewed

@@ -79,7 +79,7 @@ class ImagePromptEditor(nn.Module):
         self.latent = latent.detach().to(self.device)
     def set_params(self, lr, iterations, lpips_weight, reconstruction_steps, attn_mask):
-        self._attn_mask = attn_mask
         self.iterations = iterations
         self.lr = lr
         self.lpips_weight = lpips_weight
@@ -118,25 +118,16 @@ class ImagePromptEditor(nn.Module):
         loss = -torch.log(pos_logits) + torch.log(neg_logits)
         return loss
-    def visualize(self, processed_img):
-        if self.make_grid:
-            self.index += 1
-            plt.subplot(1, 13, self.index)
-            plt.imshow(get_pil(processed_img[0]).detach().cpu())
-        else:
-            plt.imshow(get_pil(processed_img[0]).detach().cpu())
-            plt.show()
-    def _attn_mask(self, grad):
         newgrad = grad
-        if self._attn_mask is not None:
-            newgrad = grad * (self._attn_mask)
         return newgrad
-    def _attn_mask_inverse(self, grad):
         newgrad = grad
-        if self._attn_mask is not None:
-            newgrad = grad * ((self._attn_mask - 1) * -1)
         return newgrad
     def _get_next_inputs(self, transformed_img):
@@ -144,11 +135,11 @@ class ImagePromptEditor(nn.Module):
         processed_img.retain_grad()
         lpips_input = processed_img.clone()
-        lpips_input.register_hook(self._attn_mask_inverse)
         lpips_input.retain_grad()
         clip_input = processed_img.clone()
-        clip_input.register_hook(self._attn_mask)
         clip_input.retain_grad()
         return (processed_img, lpips_input, clip_input)
@@ -160,15 +151,15 @@ class ImagePromptEditor(nn.Module):
             processed_img, lpips_input, clip_input = self._get_next_inputs(
                 transformed_img
             )
-            with torch.autocast("cuda"):
-                clip_loss = self._get_CLIP_loss(pos_prompts, neg_prompts, clip_input)
-                print("CLIP loss", clip_loss)
-                perceptual_loss = (
-                    self.perceptual_loss(lpips_input, original_img.clone())
-                    * self.lpips_weight
-                )
-                print("LPIPS loss: ", perceptual_loss)
-                print("Sum Loss", perceptual_loss + clip_loss)
             if log:
                 wandb.log({"Perceptual Loss": perceptual_loss})
                 wandb.log({"CLIP Loss": clip_loss})
@@ -188,7 +179,7 @@ class ImagePromptEditor(nn.Module):
             processed_img.retain_grad()
             lpips_input = processed_img.clone()
-            lpips_input.register_hook(self._attn_mask_inverse)
             lpips_input.retain_grad()
             with torch.autocast("cuda"):
                 perceptual_loss = (
@@ -217,4 +208,5 @@ class ImagePromptEditor(nn.Module):
         print("Running LPIPS optim only")
         for transform in self._optimize_LPIPS(vector, original_img, optim):
             yield transform
         yield vector if self.return_val == "vector" else self.latent + vector

         self.latent = latent.detach().to(self.device)
     def set_params(self, lr, iterations, lpips_weight, reconstruction_steps, attn_mask):
+        self.attn_mask = attn_mask
         self.iterations = iterations
         self.lr = lr
         self.lpips_weight = lpips_weight
         loss = -torch.log(pos_logits) + torch.log(neg_logits)
         return loss
+    def _apply_mask(self, grad):
         newgrad = grad
+        if self.attn_mask is not None:
+            newgrad = grad * (self.attn_mask)
         return newgrad
+    def _apply_inverse_mask(self, grad):
         newgrad = grad
+        if self.attn_mask is not None:
+            newgrad = grad * ((self.attn_mask - 1) * -1)
         return newgrad
     def _get_next_inputs(self, transformed_img):
         processed_img.retain_grad()
         lpips_input = processed_img.clone()
+        lpips_input.register_hook(self._apply_inverse_mask)
         lpips_input.retain_grad()
         clip_input = processed_img.clone()
+        clip_input.register_hook(self._apply_mask)
         clip_input.retain_grad()
         return (processed_img, lpips_input, clip_input)
             processed_img, lpips_input, clip_input = self._get_next_inputs(
                 transformed_img
             )
+            # with torch.autocast("cuda"):
+            clip_loss = self._get_CLIP_loss(pos_prompts, neg_prompts, clip_input)
+            print("CLIP loss", clip_loss)
+            perceptual_loss = (
+                self.perceptual_loss(lpips_input, original_img.clone())
+                * self.lpips_weight
+            )
+            print("LPIPS loss: ", perceptual_loss)
+            print("Sum Loss", perceptual_loss + clip_loss)
             if log:
                 wandb.log({"Perceptual Loss": perceptual_loss})
                 wandb.log({"CLIP Loss": clip_loss})
             processed_img.retain_grad()
             lpips_input = processed_img.clone()
+            lpips_input.register_hook(self._apply_inverse_mask)
             lpips_input.retain_grad()
             with torch.autocast("cuda"):
                 perceptual_loss = (
         print("Running LPIPS optim only")
         for transform in self._optimize_LPIPS(vector, original_img, optim):
             yield transform
         yield vector if self.return_val == "vector" else self.latent + vector

presets.py CHANGED Viewed

@@ -2,6 +2,7 @@ import gradio as gr
 def set_preset(config_str):
     choices = [
         "Small Masked Changes (e.g. add lipstick)",
         "Major Masked Changes (e.g. change hair color or nose size)",

 def set_preset(config_str):
+    print(config_str)
     choices = [
         "Small Masked Changes (e.g. add lipstick)",
         "Major Masked Changes (e.g. change hair color or nose size)",