Spaces:

radames
/

UserControllableLT-Latent-Transformer

Runtime error

App Files Files Community

radames commited on May 23, 2023

Commit

8a8b805

•

1 Parent(s): 67eaa47

add state for initial position

Browse files

Files changed (2) hide show

interface/app.py +20 -14
interface/model_loader.py +240 -240

interface/app.py CHANGED Viewed

@@ -28,13 +28,13 @@ def random_sample(model_name: str):
     return pil_img, model_name, latents
-def zoom(model_state, latents_state, dx=0, dy=0, dz=0):
     model = models[model_state]
     dx = dx
     dy = dy
     dz = dz
-    sx = 100
-    sy = 100
     stop_points = []
     img, latents_state = model.zoom(
         latents_state, dz, sxsy=[sx, sy], stop_points=stop_points
@@ -43,14 +43,14 @@ def zoom(model_state, latents_state, dx=0, dy=0, dz=0):
     return pil_img, latents_state
-def translate(model_state, latents_state, dx=0, dy=0, dz=0):
     model = models[model_state]
     dx = dx
     dy = dy
     dz = dz
-    sx = 128
-    sy = 128
     stop_points = []
     zi = False
     zo = False
@@ -82,9 +82,15 @@ def reset(model_state, latents_state):
     return pil_img, latents_state
 with gr.Blocks() as block:
     model_state = gr.State(value="cat")
     latents_state = gr.State({})
     gr.Markdown("# UserControllableLT: User controllable latent transformer")
     gr.Markdown("## Select model")
     with gr.Row():
@@ -99,13 +105,13 @@ with gr.Blocks() as block:
                 reset_btn = gr.Button("Reset")
             dx = gr.Slider(
-                minimum=-128, maximum=128, step_size=0.1, label="dx", value=0.0
             )
             dy = gr.Slider(
-                minimum=-128, maximum=128, step_size=0.1, label="dy", value=0.0
             )
             dz = gr.Slider(
-                minimum=-128, maximum=128, step_size=0.1, label="dz", value=0.0
             )
             with gr.Row():
@@ -113,10 +119,10 @@ with gr.Blocks() as block:
         with gr.Column():
             image = gr.Image(type="pil", label="")
     button.click(
         random_sample, inputs=[model_name], outputs=[image, model_state, latents_state]
     )
     reset_btn.click(
         reset,
         inputs=[model_state, latents_state],
@@ -130,22 +136,22 @@ with gr.Blocks() as block:
     )
     dx.change(
         translate,
-        inputs=[model_state, latents_state, dx, dy, dz],
         outputs=[image, latents_state],
         show_progress=False,
     )
     dy.change(
         translate,
-        inputs=[model_state, latents_state, dx, dy, dz],
         outputs=[image, latents_state],
         show_progress=False,
     )
     dz.change(
         zoom,
-        inputs=[model_state, latents_state, dx, dy, dz],
         outputs=[image, latents_state],
         show_progress=False,
     )
 block.launch()

     return pil_img, model_name, latents
+def zoom(model_state, latents_state, dx=0, dy=0, dz=0, sxsy=[128, 128]):
     model = models[model_state]
     dx = dx
     dy = dy
     dz = dz
+    sx = sxsy[0]
+    sy = sxsy[1]
     stop_points = []
     img, latents_state = model.zoom(
         latents_state, dz, sxsy=[sx, sy], stop_points=stop_points
     return pil_img, latents_state
+def translate(model_state, latents_state, dx=0, dy=0, dz=0, sxsy=[128, 128]):
     model = models[model_state]
     dx = dx
     dy = dy
     dz = dz
+    sx = sxsy[0]
+    sy = sxsy[1]
     stop_points = []
     zi = False
     zo = False
     return pil_img, latents_state
+def image_click(evt: gr.SelectData):
+    click_pos = evt.index
+    return click_pos
 with gr.Blocks() as block:
     model_state = gr.State(value="cat")
     latents_state = gr.State({})
+    sxsy = gr.State([128, 128])
     gr.Markdown("# UserControllableLT: User controllable latent transformer")
     gr.Markdown("## Select model")
     with gr.Row():
                 reset_btn = gr.Button("Reset")
             dx = gr.Slider(
+                minimum=-256, maximum=256, step_size=0.1, label="dx", value=0.0
             )
             dy = gr.Slider(
+                minimum=-256, maximum=256, step_size=0.1, label="dy", value=0.0
             )
             dz = gr.Slider(
+                minimum=-256, maximum=256, step_size=0.1, label="dz", value=0.0
             )
             with gr.Row():
         with gr.Column():
             image = gr.Image(type="pil", label="")
+    image.select(image_click, inputs=None, outputs=sxsy)
     button.click(
         random_sample, inputs=[model_name], outputs=[image, model_state, latents_state]
     )
     reset_btn.click(
         reset,
         inputs=[model_state, latents_state],
     )
     dx.change(
         translate,
+        inputs=[model_state, latents_state, dx, dy, dz, sxsy],
         outputs=[image, latents_state],
         show_progress=False,
     )
     dy.change(
         translate,
+        inputs=[model_state, latents_state, dx, dy, dz, sxsy],
         outputs=[image, latents_state],
         show_progress=False,
     )
     dz.change(
         zoom,
+        inputs=[model_state, latents_state, dx, dy, dz, sxsy],
         outputs=[image, latents_state],
         show_progress=False,
     )
+block.queue()
 block.launch()

interface/model_loader.py CHANGED Viewed

@@ -1,240 +1,240 @@
-import os
-from argparse import Namespace
-import numpy as np
-import torch
-from models.StyleGANControler import StyleGANControler
-class Model:
-    def __init__(
-        self, checkpoint_path, truncation=0.5, use_average_code_as_input=False
-    ):
-        self.truncation = truncation
-        self.use_average_code_as_input = use_average_code_as_input
-        ckpt = torch.load(checkpoint_path, map_location="cpu")
-        opts = ckpt["opts"]
-        opts["checkpoint_path"] = checkpoint_path
-        self.opts = Namespace(**ckpt["opts"])
-        self.net = StyleGANControler(self.opts)
-        self.net.eval()
-        self.net.cuda()
-        self.target_layers = [0, 1, 2, 3, 4, 5]
-    def random_sample(self):
-        z1 = torch.randn(1, 512).to("cuda")
-        x1, w1, f1 = self.net.decoder(
-            [z1],
-            input_is_latent=False,
-            randomize_noise=False,
-            return_feature_map=True,
-            return_latents=True,
-            truncation=self.truncation,
-            truncation_latent=self.net.latent_avg[0],
-        )
-        w1_initial = w1.clone()
-        x1 = self.net.face_pool(x1)
-        image = (
-            ((x1.detach()[0].permute(1, 2, 0) + 1.0) * 127.5).cpu().numpy()[:, :, ::-1]
-        )
-        return (
-            image,
-            {
-                "w1": w1.cpu().detach().numpy(),
-                "w1_initial": w1_initial.cpu().detach().numpy(),
-            },
-        )  # return latent vector along with the image
-    def latents_to_tensor(self, latents):
-        w1 = latents["w1"]
-        w1_initial = latents["w1_initial"]
-        w1 = torch.tensor(w1).to("cuda")
-        w1_initial = torch.tensor(w1_initial).to("cuda")
-        x1, w1, f1 = self.net.decoder(
-            [w1],
-            input_is_latent=True,
-            randomize_noise=False,
-            return_feature_map=True,
-            return_latents=True,
-        )
-        x1, w1_initial, f1 = self.net.decoder(
-            [w1_initial],
-            input_is_latent=True,
-            randomize_noise=False,
-            return_feature_map=True,
-            return_latents=True,
-        )
-        return (w1, w1_initial, f1)
-    def zoom(self, latents, dz, sxsy=[0, 0], stop_points=[]):
-        w1, w1_initial, f1 = self.latents_to_tensor(latents)
-        w1 = w1_initial.clone()
-        vec_num = abs(dz) / 5
-        dz = 100 * np.sign(dz)
-        x = torch.from_numpy(np.array([[[1.0, 0, dz]]], dtype=np.float32)).cuda()
-        f1 = torch.nn.functional.interpolate(f1, (256, 256))
-        y = f1[:, :, sxsy[1], sxsy[0]].unsqueeze(0)
-        if len(stop_points) > 0:
-            x = torch.cat(
-                [x, torch.zeros(x.shape[0], len(stop_points), x.shape[2]).cuda()], dim=1
-            )
-            tmp = []
-            for sp in stop_points:
-                tmp.append(f1[:, :, sp[1], sp[0]].unsqueeze(1))
-            y = torch.cat([y, torch.cat(tmp, dim=1)], dim=1)
-        if not self.use_average_code_as_input:
-            w_hat = self.net.encoder(
-                w1[:, self.target_layers].detach(),
-                x.detach(),
-                y.detach(),
-                alpha=vec_num,
-            )
-            w1 = w1.clone()
-            w1[:, self.target_layers] = w_hat
-        else:
-            w_hat = self.net.encoder(
-                self.net.latent_avg.unsqueeze(0)[:, self.target_layers].detach(),
-                x.detach(),
-                y.detach(),
-                alpha=vec_num,
-            )
-            w1 = w1.clone()
-            w1[:, self.target_layers] = (
-                w1.clone()[:, self.target_layers]
-                + w_hat
-                - self.net.latent_avg.unsqueeze(0)[:, self.target_layers]
-            )
-        x1, _ = self.net.decoder([w1], input_is_latent=True, randomize_noise=False)
-        x1 = self.net.face_pool(x1)
-        result = (
-            ((x1.detach()[0].permute(1, 2, 0) + 1.0) * 127.5).cpu().numpy()[:, :, ::-1]
-        )
-        return (
-            result,
-            {
-                "w1": w1.cpu().detach().numpy(),
-                "w1_initial": w1_initial.cpu().detach().numpy(),
-            },
-        )  # return latent vector along with the image
-    def translate(
-        self, latents, dxy, sxsy=[0, 0], stop_points=[], zoom_in=False, zoom_out=False
-    ):
-        w1, w1_initial, f1 = self.latents_to_tensor(latents)
-        w1 = w1_initial.clone()
-        dz = -5.0 if zoom_in else 0.0
-        dz = 5.0 if zoom_out else dz
-        dxyz = np.array([dxy[0], dxy[1], dz], dtype=np.float32)
-        dxy_norm = np.linalg.norm(dxyz[:2], ord=2)
-        dxyz[:2] = dxyz[:2] / dxy_norm
-        vec_num = dxy_norm / 10
-        x = torch.from_numpy(np.array([[dxyz]], dtype=np.float32)).cuda()
-        f1 = torch.nn.functional.interpolate(f1, (256, 256))
-        y = f1[:, :, sxsy[1], sxsy[0]].unsqueeze(0)
-        if len(stop_points) > 0:
-            x = torch.cat(
-                [x, torch.zeros(x.shape[0], len(stop_points), x.shape[2]).cuda()], dim=1
-            )
-            tmp = []
-            for sp in stop_points:
-                tmp.append(f1[:, :, sp[1], sp[0]].unsqueeze(1))
-            y = torch.cat([y, torch.cat(tmp, dim=1)], dim=1)
-        if not self.use_average_code_as_input:
-            w_hat = self.net.encoder(
-                w1[:, self.target_layers].detach(),
-                x.detach(),
-                y.detach(),
-                alpha=vec_num,
-            )
-            w1 = w1.clone()
-            w1[:, self.target_layers] = w_hat
-        else:
-            w_hat = self.net.encoder(
-                self.net.latent_avg.unsqueeze(0)[:, self.target_layers].detach(),
-                x.detach(),
-                y.detach(),
-                alpha=vec_num,
-            )
-            w1 = w1.clone()
-            w1[:, self.target_layers] = (
-                w1.clone()[:, self.target_layers]
-                + w_hat
-                - self.net.latent_avg.unsqueeze(0)[:, self.target_layers]
-            )
-        x1, _ = self.net.decoder([w1], input_is_latent=True, randomize_noise=False)
-        x1 = self.net.face_pool(x1)
-        result = (
-            ((x1.detach()[0].permute(1, 2, 0) + 1.0) * 127.5).cpu().numpy()[:, :, ::-1]
-        )
-        return (
-            result,
-            {
-                "w1": w1.cpu().detach().numpy(),
-                "w1_initial": w1_initial.cpu().detach().numpy(),
-            },
-        )
-    def change_style(self, latents):
-        w1, w1_initial, f1 = self.latents_to_tensor(latents)
-        w1 = w1_initial.clone()
-        z1 = torch.randn(1, 512).to("cuda")
-        x1, w2 = self.net.decoder(
-            [z1],
-            input_is_latent=False,
-            randomize_noise=False,
-            return_latents=True,
-            truncation=self.truncation,
-            truncation_latent=self.net.latent_avg[0],
-        )
-        w1[:, 6:] = w2.detach()[:, 0]
-        x1, w1_new = self.net.decoder(
-            [w1],
-            input_is_latent=True,
-            randomize_noise=False,
-            return_latents=True,
-        )
-        result = (
-            ((x1.detach()[0].permute(1, 2, 0) + 1.0) * 127.5).cpu().numpy()[:, :, ::-1]
-        )
-        return (
-            result,
-            {
-                "w1": w1_new.cpu().detach().numpy(),
-                "w1_initial": w1_new.cpu().detach().numpy(),
-            },
-        )
-    def reset(self, latents):
-        w1, w1_initial, f1 = self.latents_to_tensor(latents)
-        x1, w1_new, f1 = self.net.decoder(
-            [w1_initial],
-            input_is_latent=True,
-            randomize_noise=False,
-            return_feature_map=True,
-            return_latents=True,
-        )
-        result = (
-            ((x1.detach()[0].permute(1, 2, 0) + 1.0) * 127.5).cpu().numpy()[:, :, ::-1]
-        )
-        return (
-            result,
-            {
-                "w1": w1_new.cpu().detach().numpy(),
-                "w1_initial": w1_new.cpu().detach().numpy(),
-            },
-        )

+import os
+from argparse import Namespace
+import numpy as np
+import torch
+from models.StyleGANControler import StyleGANControler
+class Model:
+    def __init__(
+        self, checkpoint_path, truncation=0.5, use_average_code_as_input=False
+    ):
+        self.truncation = truncation
+        self.use_average_code_as_input = use_average_code_as_input
+        ckpt = torch.load(checkpoint_path, map_location="cpu")
+        opts = ckpt["opts"]
+        opts["checkpoint_path"] = checkpoint_path
+        self.opts = Namespace(**ckpt["opts"])
+        self.net = StyleGANControler(self.opts)
+        self.net.eval()
+        self.net.cuda()
+        self.target_layers = [0, 1, 2, 3, 4, 5]
+    def random_sample(self):
+        z1 = torch.randn(1, 512).to("cuda")
+        x1, w1, f1 = self.net.decoder(
+            [z1],
+            input_is_latent=False,
+            randomize_noise=False,
+            return_feature_map=True,
+            return_latents=True,
+            truncation=self.truncation,
+            truncation_latent=self.net.latent_avg[0],
+        )
+        w1_initial = w1.clone()
+        x1 = self.net.face_pool(x1)
+        image = (
+            ((x1.detach()[0].permute(1, 2, 0) + 1.0) * 127.5).cpu().numpy()[:, :, ::-1]
+        )
+        return (
+            image,
+            {
+                "w1": w1.cpu().detach().numpy(),
+                "w1_initial": w1_initial.cpu().detach().numpy(),
+            },
+        )  # return latent vector along with the image
+    def latents_to_tensor(self, latents):
+        w1 = latents["w1"]
+        w1_initial = latents["w1_initial"]
+        w1 = torch.tensor(w1).to("cuda")
+        w1_initial = torch.tensor(w1_initial).to("cuda")
+        x1, w1, f1 = self.net.decoder(
+            [w1],
+            input_is_latent=True,
+            randomize_noise=False,
+            return_feature_map=True,
+            return_latents=True,
+        )
+        x1, w1_initial, f1 = self.net.decoder(
+            [w1_initial],
+            input_is_latent=True,
+            randomize_noise=False,
+            return_feature_map=True,
+            return_latents=True,
+        )
+        return (w1, w1_initial, f1)
+    def zoom(self, latents, dz, sxsy=[0, 0], stop_points=[]):
+        w1, w1_initial, f1 = self.latents_to_tensor(latents)
+        w1 = w1_initial.clone()
+        vec_num = abs(dz) / 5
+        dz = 100 * np.sign(dz)
+        x = torch.from_numpy(np.array([[[1.0, 0, dz]]], dtype=np.float32)).cuda()
+        f1 = torch.nn.functional.interpolate(f1, (256, 256))
+        y = f1[:, :, sxsy[1], sxsy[0]].unsqueeze(0)
+        if len(stop_points) > 0:
+            x = torch.cat(
+                [x, torch.zeros(x.shape[0], len(stop_points), x.shape[2]).cuda()], dim=1
+            )
+            tmp = []
+            for sp in stop_points:
+                tmp.append(f1[:, :, sp[1], sp[0]].unsqueeze(1))
+            y = torch.cat([y, torch.cat(tmp, dim=1)], dim=1)
+        if not self.use_average_code_as_input:
+            w_hat = self.net.encoder(
+                w1[:, self.target_layers].detach(),
+                x.detach(),
+                y.detach(),
+                alpha=vec_num,
+            )
+            w1 = w1.clone()
+            w1[:, self.target_layers] = w_hat
+        else:
+            w_hat = self.net.encoder(
+                self.net.latent_avg.unsqueeze(0)[:, self.target_layers].detach(),
+                x.detach(),
+                y.detach(),
+                alpha=vec_num,
+            )
+            w1 = w1.clone()
+            w1[:, self.target_layers] = (
+                w1.clone()[:, self.target_layers]
+                + w_hat
+                - self.net.latent_avg.unsqueeze(0)[:, self.target_layers]
+            )
+        x1, _ = self.net.decoder([w1], input_is_latent=True, randomize_noise=False)
+        x1 = self.net.face_pool(x1)
+        result = (
+            ((x1.detach()[0].permute(1, 2, 0) + 1.0) * 127.5).cpu().numpy()[:, :, ::-1]
+        )
+        return (
+            result,
+            {
+                "w1": w1.cpu().detach().numpy(),
+                "w1_initial": w1_initial.cpu().detach().numpy(),
+            },
+        )  # return latent vector along with the image
+    def translate(
+        self, latents, dxy, sxsy=[0, 0], stop_points=[], zoom_in=False, zoom_out=False
+    ):
+        w1, w1_initial, f1 = self.latents_to_tensor(latents)
+        w1 = w1_initial.clone()
+        dz = -5.0 if zoom_in else 0.0
+        dz = 5.0 if zoom_out else dz
+        dxyz = np.array([dxy[0], dxy[1], dz], dtype=np.float32)
+        dxy_norm = np.linalg.norm(dxyz[:2], ord=2)
+        dxyz[:2] = dxyz[:2] / dxy_norm
+        vec_num = dxy_norm / 10
+        x = torch.from_numpy(np.array([[dxyz]], dtype=np.float32)).cuda()
+        f1 = torch.nn.functional.interpolate(f1, (256, 256))
+        y = f1[:, :, sxsy[1], sxsy[0]].unsqueeze(0)
+        if len(stop_points) > 0:
+            x = torch.cat(
+                [x, torch.zeros(x.shape[0], len(stop_points), x.shape[2]).cuda()], dim=1
+            )
+            tmp = []
+            for sp in stop_points:
+                tmp.append(f1[:, :, sp[1], sp[0]].unsqueeze(1))
+            y = torch.cat([y, torch.cat(tmp, dim=1)], dim=1)
+        if not self.use_average_code_as_input:
+            w_hat = self.net.encoder(
+                w1[:, self.target_layers].detach(),
+                x.detach(),
+                y.detach(),
+                alpha=vec_num,
+            )
+            w1 = w1.clone()
+            w1[:, self.target_layers] = w_hat
+        else:
+            w_hat = self.net.encoder(
+                self.net.latent_avg.unsqueeze(0)[:, self.target_layers].detach(),
+                x.detach(),
+                y.detach(),
+                alpha=vec_num,
+            )
+            w1 = w1.clone()
+            w1[:, self.target_layers] = (
+                w1.clone()[:, self.target_layers]
+                + w_hat
+                - self.net.latent_avg.unsqueeze(0)[:, self.target_layers]
+            )
+        x1, _ = self.net.decoder([w1], input_is_latent=True, randomize_noise=False)
+        x1 = self.net.face_pool(x1)
+        result = (
+            ((x1.detach()[0].permute(1, 2, 0) + 1.0) * 127.5).cpu().numpy()[:, :, ::-1]
+        )
+        return (
+            result,
+            {
+                "w1": w1.cpu().detach().numpy(),
+                "w1_initial": w1_initial.cpu().detach().numpy(),
+            },
+        )
+    def change_style(self, latents):
+        w1, w1_initial, f1 = self.latents_to_tensor(latents)
+        w1 = w1_initial.clone()
+        z1 = torch.randn(1, 512).to("cuda")
+        x1, w2 = self.net.decoder(
+            [z1],
+            input_is_latent=False,
+            randomize_noise=False,
+            return_latents=True,
+            truncation=self.truncation,
+            truncation_latent=self.net.latent_avg[0],
+        )
+        w1[:, 6:] = w2.detach()[:, 0]
+        x1, w1_new = self.net.decoder(
+            [w1],
+            input_is_latent=True,
+            randomize_noise=False,
+            return_latents=True,
+        )
+        result = (
+            ((x1.detach()[0].permute(1, 2, 0) + 1.0) * 127.5).cpu().numpy()[:, :, ::-1]
+        )
+        return (
+            result,
+            {
+                "w1": w1_new.cpu().detach().numpy(),
+                "w1_initial": w1_new.cpu().detach().numpy(),
+            },
+        )
+    def reset(self, latents):
+        w1, w1_initial, f1 = self.latents_to_tensor(latents)
+        x1, w1_new, f1 = self.net.decoder(
+            [w1_initial],
+            input_is_latent=True,
+            randomize_noise=False,
+            return_feature_map=True,
+            return_latents=True,
+        )
+        result = (
+            ((x1.detach()[0].permute(1, 2, 0) + 1.0) * 127.5).cpu().numpy()[:, :, ::-1]
+        )
+        return (
+            result,
+            {
+                "w1": w1_new.cpu().detach().numpy(),
+                "w1_initial": w1_new.cpu().detach().numpy(),
+            },
+        )