Spaces:

DragGan
/

DragGan

Running on A10G

App Files Files Community

radames commited on Jun 29, 2023

Commit

e722a6c

•

1 Parent(s): 1294e7a

merge with main

Browse files

Files changed (3) hide show

viz/capture_widget.py +3 -2
viz/drag_widget.py +1 -0
viz/renderer.py +84 -108

viz/capture_widget.py CHANGED Viewed

@@ -31,7 +31,7 @@ class CaptureWidget:
         viz = self.viz
         try:
             _height, _width, channels = image.shape
-            assert channels in [1, 3]
             assert image.dtype == np.uint8
             os.makedirs(self.path, exist_ok=True)
             file_id = 0
@@ -43,8 +43,9 @@ class CaptureWidget:
             if channels == 1:
                 pil_image = PIL.Image.fromarray(image[:, :, 0], 'L')
             else:
-                pil_image = PIL.Image.fromarray(image, 'RGB')
             pil_image.save(os.path.join(self.path, f'{file_id:05d}.png'))
         except:
             viz.result.error = renderer.CapturedException()

         viz = self.viz
         try:
             _height, _width, channels = image.shape
+            print(viz.result)
             assert image.dtype == np.uint8
             os.makedirs(self.path, exist_ok=True)
             file_id = 0
             if channels == 1:
                 pil_image = PIL.Image.fromarray(image[:, :, 0], 'L')
             else:
+                pil_image = PIL.Image.fromarray(image[:, :, :3], 'RGB')
             pil_image.save(os.path.join(self.path, f'{file_id:05d}.png'))
+            np.save(os.path.join(self.path, f'{file_id:05d}.npy'), viz.result.w)
         except:
             viz.result.error = renderer.CapturedException()

viz/drag_widget.py CHANGED Viewed

@@ -90,6 +90,7 @@ class DragWidget:
     @imgui_utils.scoped_by_object_id
     def __call__(self, show=True):
         viz = self.viz
         if show:
             with imgui_utils.grayed_out(self.disabled_time != 0):
                 imgui.text('Drag')

     @imgui_utils.scoped_by_object_id
     def __call__(self, show=True):
         viz = self.viz
+        reset = False
         if show:
             with imgui_utils.grayed_out(self.disabled_time != 0):
                 imgui.text('Drag')

viz/renderer.py CHANGED Viewed

@@ -20,10 +20,9 @@ import torch.nn.functional as F
 import matplotlib.cm
 import dnnlib
 from torch_utils.ops import upfirdn2d
-import legacy  # pylint: disable=import-error
-# ----------------------------------------------------------------------------
 class CapturedException(Exception):
     def __init__(self, msg=None):
@@ -37,16 +36,14 @@ class CapturedException(Exception):
         assert isinstance(msg, str)
         super().__init__(msg)
-# ----------------------------------------------------------------------------
 class CaptureSuccess(Exception):
     def __init__(self, out):
         super().__init__()
         self.out = out
-# ----------------------------------------------------------------------------
 def add_watermark_np(input_image_array, watermark_text="AI Generated"):
     image = Image.fromarray(np.uint8(input_image_array)).convert("RGBA")
@@ -57,10 +54,8 @@ def add_watermark_np(input_image_array, watermark_text="AI Generated"):
     d = ImageDraw.Draw(txt)
     text_width, text_height = font.getsize(watermark_text)
-    text_position = (image.size[0] - text_width -
-                     10, image.size[1] - text_height - 10)
-    # white color with the alpha channel set to semi-transparent
-    text_color = (255, 255, 255, 128)
     # Draw the text onto the text canvas
     d.text(text_position, watermark_text, font=font, fill=text_color)
@@ -70,22 +65,22 @@ def add_watermark_np(input_image_array, watermark_text="AI Generated"):
     watermarked_array = np.array(watermarked)
     return watermarked_array
-# ----------------------------------------------------------------------------
 class Renderer:
     def __init__(self, disable_timing=False):
-        self._device = torch.device('cuda')
-        self._pkl_data = dict()    # {pkl: dict | CapturedException, ...}
-        self._networks = dict()    # {cache_key: torch.nn.Module, ...}
-        self._pinned_bufs = dict()    # {(shape, dtype): torch.Tensor, ...}
-        self._cmaps = dict()    # {name: torch.Tensor, ...}
-        self._is_timing = False
         if not disable_timing:
-            self._start_event = torch.cuda.Event(enable_timing=True)
-            self._end_event = torch.cuda.Event(enable_timing=True)
         self._disable_timing = disable_timing
-        self._net_layers = dict()    # {cache_key: [dnnlib.EasyDict, ...], ...}
     def render(self, **args):
         if self._disable_timing:
@@ -101,6 +96,9 @@ class Renderer:
             if hasattr(self, 'pkl'):
                 if self.pkl != args['pkl']:
                     init_net = True
             if hasattr(self, 'w0_seed'):
                 if self.w0_seed != args['w0_seed']:
                     init_net = True
@@ -128,8 +126,7 @@ class Renderer:
         if self._is_timing and not self._disable_timing:
             self._end_event.synchronize()
-            res.render_time = self._start_event.elapsed_time(
-                self._end_event) * 1e-3
             self._is_timing = False
         return res
@@ -150,8 +147,7 @@ class Renderer:
             raise data
         orig_net = data[key]
-        cache_key = (orig_net, self._device, tuple(
-            sorted(tweak_kwargs.items())))
         net = self._networks.get(cache_key, None)
         if net is None:
             try:
@@ -167,11 +163,9 @@ class Renderer:
                 print(data[key].init_args)
                 print(data[key].init_kwargs)
                 if 'stylegan_human' in pkl:
-                    net = Generator(
-                        *data[key].init_args, **data[key].init_kwargs, square=False, padding=True)
                 else:
-                    net = Generator(*data[key].init_args,
-                                    **data[key].init_kwargs)
                 net.load_state_dict(data[key].state_dict())
                 net.to(self._device)
             except:
@@ -212,27 +206,25 @@ class Renderer:
         return x
     def init_network(self, res,
-                     pkl=None,
-                     w0_seed=0,
-                     w_load=None,
-                     w_plus=True,
-                     noise_mode='const',
-                     trunc_psi=0.7,
-                     trunc_cutoff=None,
-                     input_transform=None,
-                     lr=0.001,
-                     **kwargs
-                     ):
         # Dig up network details.
         self.pkl = pkl
         G = self.get_network(pkl, 'G_ema')
         self.G = G
         res.img_resolution = G.img_resolution
         res.num_ws = G.num_ws
-        res.has_noise = any('noise_const' in name for name,
-                            _buf in G.synthesis.named_buffers())
-        res.has_input_transform = (
-            hasattr(G.synthesis, 'input') and hasattr(G.synthesis.input, 'transform'))
         # Set input transform.
         if res.has_input_transform:
@@ -250,13 +242,11 @@ class Renderer:
         if self.w_load is None:
             # Generate random latents.
-            z = torch.from_numpy(np.random.RandomState(
-                w0_seed).randn(1, 512)).to(self._device).float()
             # Run mapping network.
             label = torch.zeros([1, G.c_dim], device=self._device)
-            w = G.mapping(z, label, truncation_psi=trunc_psi,
-                          truncation_cutoff=trunc_cutoff)
         else:
             w = self.w_load.clone().to(self._device)
@@ -280,34 +270,34 @@ class Renderer:
         print('    Remain feat_refs and points0_pt')
     def _render_drag_impl(self, res,
-                          points=[],
-                          targets=[],
-                          mask=None,
-                          lambda_mask=10,
-                          reg=0,
-                          feature_idx=5,
-                          r1=3,
-                          r2=12,
-                          random_seed=0,
-                          noise_mode='const',
-                          trunc_psi=0.7,
-                          force_fp32=False,
-                          layer_name=None,
-                          sel_channels=3,
-                          base_channel=0,
-                          img_scale_db=0,
-                          img_normalize=False,
-                          untransform=False,
-                          is_drag=False,
-                          reset=False,
-                          to_pil=False,
-                          **kwargs
-                          ):
         G = self.G
         ws = self.w
         if ws.dim() == 2:
-            ws = ws.unsqueeze(1).repeat(1, 6, 1)
-        ws = torch.cat([ws[:, :6, :], self.w0[:, 6:, :]], dim=1)
         if hasattr(self, 'points'):
             if len(points) != len(self.points):
                 reset = True
@@ -318,8 +308,7 @@ class Renderer:
         # Run synthesis network.
         label = torch.zeros([1, G.c_dim], device=self._device)
-        img, feat = G(ws, label, truncation_psi=trunc_psi,
-                      noise_mode=noise_mode, input_is_w=True, return_feature=True)
         h, w = G.img_resolution, G.img_resolution
@@ -327,17 +316,14 @@ class Renderer:
             X = torch.linspace(0, h, h)
             Y = torch.linspace(0, w, w)
             xx, yy = torch.meshgrid(X, Y)
-            feat_resize = F.interpolate(
-                feat[feature_idx], [h, w], mode='bilinear')
             if self.feat_refs is None:
-                self.feat0_resize = F.interpolate(
-                    feat[feature_idx].detach(), [h, w], mode='bilinear')
                 self.feat_refs = []
                 for point in points:
                     py, px = round(point[0]), round(point[1])
-                    self.feat_refs.append(self.feat0_resize[:, :, py, px])
-                self.points0_pt = torch.Tensor(points).unsqueeze(
-                    0).to(self._device)  # 1, N, 2
             # Point tracking with feature matching
             with torch.no_grad():
@@ -347,13 +333,11 @@ class Renderer:
                     down = min(point[0] + r + 1, h)
                     left = max(point[1] - r, 0)
                     right = min(point[1] + r + 1, w)
-                    feat_patch = feat_resize[:, :, up:down, left:right]
-                    L2 = torch.linalg.norm(
-                        feat_patch - self.feat_refs[j].reshape(1, -1, 1, 1), dim=1)
-                    _, idx = torch.min(L2.view(1, -1), -1)
                     width = right - left
-                    point = [idx.item() // width + up, idx.item() %
-                             width + left]
                     points[j] = point
             res.points = [[point[0], point[1]] for point in points]
@@ -362,31 +346,24 @@ class Renderer:
             loss_motion = 0
             res.stop = True
             for j, point in enumerate(points):
-                direction = torch.Tensor(
-                    [targets[j][1] - point[1], targets[j][0] - point[0]])
                 if torch.linalg.norm(direction) > max(2 / 512 * h, 2):
                     res.stop = False
                 if torch.linalg.norm(direction) > 1:
-                    distance = (
-                        (xx.to(self._device) - point[0])**2 + (yy.to(self._device) - point[1])**2)**0.5
                     relis, reljs = torch.where(distance < round(r1 / 512 * h))
-                    direction = direction / \
-                        (torch.linalg.norm(direction) + 1e-7)
                     gridh = (relis-direction[1]) / (h-1) * 2 - 1
                     gridw = (reljs-direction[0]) / (w-1) * 2 - 1
-                    grid = torch.stack(
-                        [gridw, gridh], dim=-1).unsqueeze(0).unsqueeze(0)
-                    target = F.grid_sample(
-                        feat_resize.float(), grid, align_corners=True).squeeze(2)
-                    loss_motion += F.l1_loss(
-                        feat_resize[:, :, relis, reljs], target.detach())
             loss = loss_motion
             if mask is not None:
                 if mask.min() == 0 and mask.max() == 1:
                     mask_usq = mask.to(self._device).unsqueeze(0).unsqueeze(0)
-                    loss_fix = F.l1_loss(
-                        feat_resize * mask_usq, self.feat0_resize * mask_usq)
                     loss += lambda_mask * loss_fix
             loss += reg * F.l1_loss(ws, self.w0)  # latent code regularization
@@ -398,15 +375,14 @@ class Renderer:
         # Scale and convert to uint8.
         img = img[0]
         if img_normalize:
-            img = img / img.norm(float('inf'),
-                                 dim=[1, 2], keepdim=True).clip(1e-8, 1e8)
         img = img * (10 ** (img_scale_db / 20))
-        img = (img * 127.5 + 128).clamp(0,
-                                        255).to(torch.uint8).permute(1, 2, 0)
         if to_pil:
             from PIL import Image
             img = img.cpu().numpy()
             img = Image.fromarray(img)
         res.image = img
-# ----------------------------------------------------------------------------

 import matplotlib.cm
 import dnnlib
 from torch_utils.ops import upfirdn2d
+import legacy # pylint: disable=import-error
+#----------------------------------------------------------------------------
 class CapturedException(Exception):
     def __init__(self, msg=None):
         assert isinstance(msg, str)
         super().__init__(msg)
+#----------------------------------------------------------------------------
 class CaptureSuccess(Exception):
     def __init__(self, out):
         super().__init__()
         self.out = out
+#----------------------------------------------------------------------------
 def add_watermark_np(input_image_array, watermark_text="AI Generated"):
     image = Image.fromarray(np.uint8(input_image_array)).convert("RGBA")
     d = ImageDraw.Draw(txt)
     text_width, text_height = font.getsize(watermark_text)
+    text_position = (image.size[0] - text_width - 10, image.size[1] - text_height - 10)
+    text_color = (255, 255, 255, 128)  # white color with the alpha channel set to semi-transparent
     # Draw the text onto the text canvas
     d.text(text_position, watermark_text, font=font, fill=text_color)
     watermarked_array = np.array(watermarked)
     return watermarked_array
+#----------------------------------------------------------------------------
 class Renderer:
     def __init__(self, disable_timing=False):
+        self._device        = torch.device('cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu')
+        self._dtype         = torch.float32 if self._device.type == 'mps' else torch.float64
+        self._pkl_data      = dict()    # {pkl: dict | CapturedException, ...}
+        self._networks      = dict()    # {cache_key: torch.nn.Module, ...}
+        self._pinned_bufs   = dict()    # {(shape, dtype): torch.Tensor, ...}
+        self._cmaps         = dict()    # {name: torch.Tensor, ...}
+        self._is_timing     = False
         if not disable_timing:
+            self._start_event   = torch.cuda.Event(enable_timing=True)
+            self._end_event     = torch.cuda.Event(enable_timing=True)
         self._disable_timing = disable_timing
+        self._net_layers    = dict()    # {cache_key: [dnnlib.EasyDict, ...], ...}
     def render(self, **args):
         if self._disable_timing:
             if hasattr(self, 'pkl'):
                 if self.pkl != args['pkl']:
                     init_net = True
+            if hasattr(self, 'w_load'):
+                if self.w_load is not args['w_load']:
+                    init_net = True
             if hasattr(self, 'w0_seed'):
                 if self.w0_seed != args['w0_seed']:
                     init_net = True
         if self._is_timing and not self._disable_timing:
             self._end_event.synchronize()
+            res.render_time = self._start_event.elapsed_time(self._end_event) * 1e-3
             self._is_timing = False
         return res
             raise data
         orig_net = data[key]
+        cache_key = (orig_net, self._device, tuple(sorted(tweak_kwargs.items())))
         net = self._networks.get(cache_key, None)
         if net is None:
             try:
                 print(data[key].init_args)
                 print(data[key].init_kwargs)
                 if 'stylegan_human' in pkl:
+                    net = Generator(*data[key].init_args, **data[key].init_kwargs, square=False, padding=True)
                 else:
+                    net = Generator(*data[key].init_args, **data[key].init_kwargs)
                 net.load_state_dict(data[key].state_dict())
                 net.to(self._device)
             except:
         return x
     def init_network(self, res,
+        pkl             = None,
+        w0_seed         = 0,
+        w_load          = None,
+        w_plus          = True,
+        noise_mode      = 'const',
+        trunc_psi       = 0.7,
+        trunc_cutoff    = None,
+        input_transform = None,
+        lr              = 0.001,
+        **kwargs
+        ):
         # Dig up network details.
         self.pkl = pkl
         G = self.get_network(pkl, 'G_ema')
         self.G = G
         res.img_resolution = G.img_resolution
         res.num_ws = G.num_ws
+        res.has_noise = any('noise_const' in name for name, _buf in G.synthesis.named_buffers())
+        res.has_input_transform = (hasattr(G.synthesis, 'input') and hasattr(G.synthesis.input, 'transform'))
         # Set input transform.
         if res.has_input_transform:
         if self.w_load is None:
             # Generate random latents.
+            z = torch.from_numpy(np.random.RandomState(w0_seed).randn(1, 512)).to(self._device, dtype=self._dtype)
             # Run mapping network.
             label = torch.zeros([1, G.c_dim], device=self._device)
+            w = G.mapping(z, label, truncation_psi=trunc_psi, truncation_cutoff=trunc_cutoff)
         else:
             w = self.w_load.clone().to(self._device)
         print('    Remain feat_refs and points0_pt')
     def _render_drag_impl(self, res,
+        points          = [],
+        targets         = [],
+        mask            = None,
+        lambda_mask     = 10,
+        reg             = 0,
+        feature_idx     = 5,
+        r1              = 3,
+        r2              = 12,
+        random_seed     = 0,
+        noise_mode      = 'const',
+        trunc_psi       = 0.7,
+        force_fp32      = False,
+        layer_name      = None,
+        sel_channels    = 3,
+        base_channel    = 0,
+        img_scale_db    = 0,
+        img_normalize   = False,
+        untransform     = False,
+        is_drag         = False,
+        reset           = False,
+        to_pil          = False,
+        **kwargs
+    ):
         G = self.G
         ws = self.w
         if ws.dim() == 2:
+            ws = ws.unsqueeze(1).repeat(1,6,1)
+        ws = torch.cat([ws[:,:6,:], self.w0[:,6:,:]], dim=1)
         if hasattr(self, 'points'):
             if len(points) != len(self.points):
                 reset = True
         # Run synthesis network.
         label = torch.zeros([1, G.c_dim], device=self._device)
+        img, feat = G(ws, label, truncation_psi=trunc_psi, noise_mode=noise_mode, input_is_w=True, return_feature=True)
         h, w = G.img_resolution, G.img_resolution
             X = torch.linspace(0, h, h)
             Y = torch.linspace(0, w, w)
             xx, yy = torch.meshgrid(X, Y)
+            feat_resize = F.interpolate(feat[feature_idx], [h, w], mode='bilinear')
             if self.feat_refs is None:
+                self.feat0_resize = F.interpolate(feat[feature_idx].detach(), [h, w], mode='bilinear')
                 self.feat_refs = []
                 for point in points:
                     py, px = round(point[0]), round(point[1])
+                    self.feat_refs.append(self.feat0_resize[:,:,py,px])
+                self.points0_pt = torch.Tensor(points).unsqueeze(0).to(self._device) # 1, N, 2
             # Point tracking with feature matching
             with torch.no_grad():
                     down = min(point[0] + r + 1, h)
                     left = max(point[1] - r, 0)
                     right = min(point[1] + r + 1, w)
+                    feat_patch = feat_resize[:,:,up:down,left:right]
+                    L2 = torch.linalg.norm(feat_patch - self.feat_refs[j].reshape(1,-1,1,1), dim=1)
+                    _, idx = torch.min(L2.view(1,-1), -1)
                     width = right - left
+                    point = [idx.item() // width + up, idx.item() % width + left]
                     points[j] = point
             res.points = [[point[0], point[1]] for point in points]
             loss_motion = 0
             res.stop = True
             for j, point in enumerate(points):
+                direction = torch.Tensor([targets[j][1] - point[1], targets[j][0] - point[0]])
                 if torch.linalg.norm(direction) > max(2 / 512 * h, 2):
                     res.stop = False
                 if torch.linalg.norm(direction) > 1:
+                    distance = ((xx.to(self._device) - point[0])**2 + (yy.to(self._device) - point[1])**2)**0.5
                     relis, reljs = torch.where(distance < round(r1 / 512 * h))
+                    direction = direction / (torch.linalg.norm(direction) + 1e-7)
                     gridh = (relis-direction[1]) / (h-1) * 2 - 1
                     gridw = (reljs-direction[0]) / (w-1) * 2 - 1
+                    grid = torch.stack([gridw,gridh], dim=-1).unsqueeze(0).unsqueeze(0)
+                    target = F.grid_sample(feat_resize.float(), grid, align_corners=True).squeeze(2)
+                    loss_motion += F.l1_loss(feat_resize[:,:,relis,reljs], target.detach())
             loss = loss_motion
             if mask is not None:
                 if mask.min() == 0 and mask.max() == 1:
                     mask_usq = mask.to(self._device).unsqueeze(0).unsqueeze(0)
+                    loss_fix = F.l1_loss(feat_resize * mask_usq, self.feat0_resize * mask_usq)
                     loss += lambda_mask * loss_fix
             loss += reg * F.l1_loss(ws, self.w0)  # latent code regularization
         # Scale and convert to uint8.
         img = img[0]
         if img_normalize:
+            img = img / img.norm(float('inf'), dim=[1,2], keepdim=True).clip(1e-8, 1e8)
         img = img * (10 ** (img_scale_db / 20))
+        img = (img * 127.5 + 128).clamp(0, 255).to(torch.uint8).permute(1, 2, 0)
         if to_pil:
             from PIL import Image
             img = img.cpu().numpy()
             img = Image.fromarray(img)
         res.image = img
+        res.w = ws.detach().cpu().numpy()
+#----------------------------------------------------------------------------