Spaces:

DragGan
/

DragGan

Running on A10G

App Files Files Community

radames HF staff commited on Jul 4, 2023

Commit

c760a5e

•

1 Parent(s): 6d61f2f

add timeout generator

Browse files

Files changed (2) hide show

visualizer_drag_gradio.py +1 -1
viz/renderer.py +111 -81

visualizer_drag_gradio.py CHANGED Viewed

@@ -562,7 +562,7 @@ with gr.Blocks() as app:
                 if IS_SPACE and time.time() - last_time > TIMEOUT:
                     print('Timeout break!')
                     break
-                if global_state["temporal_params"]["stop"]:
                     break
                 # do drage here!

                 if IS_SPACE and time.time() - last_time > TIMEOUT:
                     print('Timeout break!')
                     break
+                if global_state["temporal_params"]["stop"] or global_state['generator_params']["stop"]:
                     break
                 # do drage here!

viz/renderer.py CHANGED Viewed

@@ -20,9 +20,10 @@ import torch.nn.functional as F
 import matplotlib.cm
 import dnnlib
 from torch_utils.ops import upfirdn2d
-import legacy # pylint: disable=import-error
-#----------------------------------------------------------------------------
 class CapturedException(Exception):
     def __init__(self, msg=None):
@@ -36,14 +37,16 @@ class CapturedException(Exception):
         assert isinstance(msg, str)
         super().__init__(msg)
-#----------------------------------------------------------------------------
 class CaptureSuccess(Exception):
     def __init__(self, out):
         super().__init__()
         self.out = out
-#----------------------------------------------------------------------------
 def add_watermark_np(input_image_array, watermark_text="AI Generated"):
     image = Image.fromarray(np.uint8(input_image_array)).convert("RGBA")
@@ -54,8 +57,10 @@ def add_watermark_np(input_image_array, watermark_text="AI Generated"):
     d = ImageDraw.Draw(txt)
     text_width, text_height = font.getsize(watermark_text)
-    text_position = (image.size[0] - text_width - 10, image.size[1] - text_height - 10)
-    text_color = (255, 255, 255, 128)  # white color with the alpha channel set to semi-transparent
     # Draw the text onto the text canvas
     d.text(text_position, watermark_text, font=font, fill=text_color)
@@ -65,22 +70,24 @@ def add_watermark_np(input_image_array, watermark_text="AI Generated"):
     watermarked_array = np.array(watermarked)
     return watermarked_array
-#----------------------------------------------------------------------------
 class Renderer:
     def __init__(self, disable_timing=False):
-        self._device        = torch.device('cuda' if torch.cuda.is_available() else 'mps' if torch.backends.mps.is_available() else 'cpu')
-        self._dtype         = torch.float32 if self._device.type == 'mps' else torch.float64
-        self._pkl_data      = dict()    # {pkl: dict | CapturedException, ...}
-        self._networks      = dict()    # {cache_key: torch.nn.Module, ...}
-        self._pinned_bufs   = dict()    # {(shape, dtype): torch.Tensor, ...}
-        self._cmaps         = dict()    # {name: torch.Tensor, ...}
-        self._is_timing     = False
         if not disable_timing:
-            self._start_event   = torch.cuda.Event(enable_timing=True)
-            self._end_event     = torch.cuda.Event(enable_timing=True)
         self._disable_timing = disable_timing
-        self._net_layers    = dict()    # {cache_key: [dnnlib.EasyDict, ...], ...}
     def render(self, **args):
         if self._disable_timing:
@@ -126,7 +133,8 @@ class Renderer:
         if self._is_timing and not self._disable_timing:
             self._end_event.synchronize()
-            res.render_time = self._start_event.elapsed_time(self._end_event) * 1e-3
             self._is_timing = False
         return res
@@ -147,7 +155,8 @@ class Renderer:
             raise data
         orig_net = data[key]
-        cache_key = (orig_net, self._device, tuple(sorted(tweak_kwargs.items())))
         net = self._networks.get(cache_key, None)
         if net is None:
             try:
@@ -163,9 +172,11 @@ class Renderer:
                 print(data[key].init_args)
                 print(data[key].init_kwargs)
                 if 'stylegan_human' in pkl:
-                    net = Generator(*data[key].init_args, **data[key].init_kwargs, square=False, padding=True)
                 else:
-                    net = Generator(*data[key].init_args, **data[key].init_kwargs)
                 net.load_state_dict(data[key].state_dict())
                 net.to(self._device)
             except:
@@ -206,26 +217,28 @@ class Renderer:
         return x
     def init_network(self, res,
-        pkl             = None,
-        w0_seed         = 0,
-        w_load          = None,
-        w_plus          = True,
-        noise_mode      = 'const',
-        trunc_psi       = 0.7,
-        trunc_cutoff    = None,
-        input_transform = None,
-        lr              = 0.001,
-        **kwargs
-        ):
         # Dig up network details.
         self.pkl = pkl
         G = self.get_network(pkl, 'G_ema')
         self.G = G
         res.img_resolution = G.img_resolution
         res.num_ws = G.num_ws
-        res.has_noise = any('noise_const' in name for name, _buf in G.synthesis.named_buffers())
-        res.has_input_transform = (hasattr(G.synthesis, 'input') and hasattr(G.synthesis.input, 'transform'))
         # Set input transform.
         if res.has_input_transform:
             m = np.eye(3)
@@ -242,11 +255,13 @@ class Renderer:
         if self.w_load is None:
             # Generate random latents.
-            z = torch.from_numpy(np.random.RandomState(w0_seed).randn(1, 512)).to(self._device, dtype=self._dtype)
             # Run mapping network.
             label = torch.zeros([1, G.c_dim], device=self._device)
-            w = G.mapping(z, label, truncation_psi=trunc_psi, truncation_cutoff=trunc_cutoff)
         else:
             w = self.w_load.clone().to(self._device)
@@ -270,34 +285,34 @@ class Renderer:
         print('    Remain feat_refs and points0_pt')
     def _render_drag_impl(self, res,
-        points          = [],
-        targets         = [],
-        mask            = None,
-        lambda_mask     = 10,
-        reg             = 0,
-        feature_idx     = 5,
-        r1              = 3,
-        r2              = 12,
-        random_seed     = 0,
-        noise_mode      = 'const',
-        trunc_psi       = 0.7,
-        force_fp32      = False,
-        layer_name      = None,
-        sel_channels    = 3,
-        base_channel    = 0,
-        img_scale_db    = 0,
-        img_normalize   = False,
-        untransform     = False,
-        is_drag         = False,
-        reset           = False,
-        to_pil          = False,
-        **kwargs
-    ):
         G = self.G
         ws = self.w
         if ws.dim() == 2:
-            ws = ws.unsqueeze(1).repeat(1,6,1)
-        ws = torch.cat([ws[:,:6,:], self.w0[:,6:,:]], dim=1)
         if hasattr(self, 'points'):
             if len(points) != len(self.points):
                 reset = True
@@ -308,7 +323,8 @@ class Renderer:
         # Run synthesis network.
         label = torch.zeros([1, G.c_dim], device=self._device)
-        img, feat = G(ws, label, truncation_psi=trunc_psi, noise_mode=noise_mode, input_is_w=True, return_feature=True)
         h, w = G.img_resolution, G.img_resolution
@@ -316,14 +332,17 @@ class Renderer:
             X = torch.linspace(0, h, h)
             Y = torch.linspace(0, w, w)
             xx, yy = torch.meshgrid(X, Y)
-            feat_resize = F.interpolate(feat[feature_idx], [h, w], mode='bilinear')
             if self.feat_refs is None:
-                self.feat0_resize = F.interpolate(feat[feature_idx].detach(), [h, w], mode='bilinear')
                 self.feat_refs = []
                 for point in points:
                     py, px = round(point[0]), round(point[1])
-                    self.feat_refs.append(self.feat0_resize[:,:,py,px])
-                self.points0_pt = torch.Tensor(points).unsqueeze(0).to(self._device) # 1, N, 2
             # Point tracking with feature matching
             with torch.no_grad():
@@ -333,11 +352,13 @@ class Renderer:
                     down = min(point[0] + r + 1, h)
                     left = max(point[1] - r, 0)
                     right = min(point[1] + r + 1, w)
-                    feat_patch = feat_resize[:,:,up:down,left:right]
-                    L2 = torch.linalg.norm(feat_patch - self.feat_refs[j].reshape(1,-1,1,1), dim=1)
-                    _, idx = torch.min(L2.view(1,-1), -1)
                     width = right - left
-                    point = [idx.item() // width + up, idx.item() % width + left]
                     points[j] = point
             res.points = [[point[0], point[1]] for point in points]
@@ -346,24 +367,31 @@ class Renderer:
             loss_motion = 0
             res.stop = True
             for j, point in enumerate(points):
-                direction = torch.Tensor([targets[j][1] - point[1], targets[j][0] - point[0]])
                 if torch.linalg.norm(direction) > max(2 / 512 * h, 2):
                     res.stop = False
                 if torch.linalg.norm(direction) > 1:
-                    distance = ((xx.to(self._device) - point[0])**2 + (yy.to(self._device) - point[1])**2)**0.5
                     relis, reljs = torch.where(distance < round(r1 / 512 * h))
-                    direction = direction / (torch.linalg.norm(direction) + 1e-7)
                     gridh = (relis-direction[1]) / (h-1) * 2 - 1
                     gridw = (reljs-direction[0]) / (w-1) * 2 - 1
-                    grid = torch.stack([gridw,gridh], dim=-1).unsqueeze(0).unsqueeze(0)
-                    target = F.grid_sample(feat_resize.float(), grid, align_corners=True).squeeze(2)
-                    loss_motion += F.l1_loss(feat_resize[:,:,relis,reljs], target.detach())
             loss = loss_motion
             if mask is not None:
                 if mask.min() == 0 and mask.max() == 1:
                     mask_usq = mask.to(self._device).unsqueeze(0).unsqueeze(0)
-                    loss_fix = F.l1_loss(feat_resize * mask_usq, self.feat0_resize * mask_usq)
                     loss += lambda_mask * loss_fix
             loss += reg * F.l1_loss(ws, self.w0)  # latent code regularization
@@ -375,9 +403,11 @@ class Renderer:
         # Scale and convert to uint8.
         img = img[0]
         if img_normalize:
-            img = img / img.norm(float('inf'), dim=[1,2], keepdim=True).clip(1e-8, 1e8)
         img = img * (10 ** (img_scale_db / 20))
-        img = (img * 127.5 + 128).clamp(0, 255).to(torch.uint8).permute(1, 2, 0)
         if to_pil:
             from PIL import Image
             img = img.cpu().numpy()
@@ -385,4 +415,4 @@ class Renderer:
         res.image = img
         res.w = ws.detach().cpu().numpy()
-#----------------------------------------------------------------------------

 import matplotlib.cm
 import dnnlib
 from torch_utils.ops import upfirdn2d
+import legacy  # pylint: disable=import-error
+# ----------------------------------------------------------------------------
 class CapturedException(Exception):
     def __init__(self, msg=None):
         assert isinstance(msg, str)
         super().__init__(msg)
+# ----------------------------------------------------------------------------
 class CaptureSuccess(Exception):
     def __init__(self, out):
         super().__init__()
         self.out = out
+# ----------------------------------------------------------------------------
 def add_watermark_np(input_image_array, watermark_text="AI Generated"):
     image = Image.fromarray(np.uint8(input_image_array)).convert("RGBA")
     d = ImageDraw.Draw(txt)
     text_width, text_height = font.getsize(watermark_text)
+    text_position = (image.size[0] - text_width -
+                     10, image.size[1] - text_height - 10)
+    # white color with the alpha channel set to semi-transparent
+    text_color = (255, 255, 255, 128)
     # Draw the text onto the text canvas
     d.text(text_position, watermark_text, font=font, fill=text_color)
     watermarked_array = np.array(watermarked)
     return watermarked_array
+# ----------------------------------------------------------------------------
 class Renderer:
     def __init__(self, disable_timing=False):
+        self._device = torch.device('cuda' if torch.cuda.is_available(
+        ) else 'mps' if torch.backends.mps.is_available() else 'cpu')
+        self._dtype = torch.float32 if self._device.type == 'mps' else torch.float64
+        self._pkl_data = dict()    # {pkl: dict | CapturedException, ...}
+        self._networks = dict()    # {cache_key: torch.nn.Module, ...}
+        self._pinned_bufs = dict()    # {(shape, dtype): torch.Tensor, ...}
+        self._cmaps = dict()    # {name: torch.Tensor, ...}
+        self._is_timing = False
         if not disable_timing:
+            self._start_event = torch.cuda.Event(enable_timing=True)
+            self._end_event = torch.cuda.Event(enable_timing=True)
         self._disable_timing = disable_timing
+        self._net_layers = dict()    # {cache_key: [dnnlib.EasyDict, ...], ...}
     def render(self, **args):
         if self._disable_timing:
         if self._is_timing and not self._disable_timing:
             self._end_event.synchronize()
+            res.render_time = self._start_event.elapsed_time(
+                self._end_event) * 1e-3
             self._is_timing = False
         return res
             raise data
         orig_net = data[key]
+        cache_key = (orig_net, self._device, tuple(
+            sorted(tweak_kwargs.items())))
         net = self._networks.get(cache_key, None)
         if net is None:
             try:
                 print(data[key].init_args)
                 print(data[key].init_kwargs)
                 if 'stylegan_human' in pkl:
+                    net = Generator(
+                        *data[key].init_args, **data[key].init_kwargs, square=False, padding=True)
                 else:
+                    net = Generator(*data[key].init_args,
+                                    **data[key].init_kwargs)
                 net.load_state_dict(data[key].state_dict())
                 net.to(self._device)
             except:
         return x
     def init_network(self, res,
+                     pkl=None,
+                     w0_seed=0,
+                     w_load=None,
+                     w_plus=True,
+                     noise_mode='const',
+                     trunc_psi=0.7,
+                     trunc_cutoff=None,
+                     input_transform=None,
+                     lr=0.001,
+                     **kwargs
+                     ):
         # Dig up network details.
         self.pkl = pkl
         G = self.get_network(pkl, 'G_ema')
         self.G = G
         res.img_resolution = G.img_resolution
         res.num_ws = G.num_ws
+        res.has_noise = any('noise_const' in name for name,
+                            _buf in G.synthesis.named_buffers())
+        res.has_input_transform = (
+            hasattr(G.synthesis, 'input') and hasattr(G.synthesis.input, 'transform'))
+        res.stop = False
         # Set input transform.
         if res.has_input_transform:
             m = np.eye(3)
         if self.w_load is None:
             # Generate random latents.
+            z = torch.from_numpy(np.random.RandomState(w0_seed).randn(
+                1, 512)).to(self._device, dtype=self._dtype)
             # Run mapping network.
             label = torch.zeros([1, G.c_dim], device=self._device)
+            w = G.mapping(z, label, truncation_psi=trunc_psi,
+                          truncation_cutoff=trunc_cutoff)
         else:
             w = self.w_load.clone().to(self._device)
         print('    Remain feat_refs and points0_pt')
     def _render_drag_impl(self, res,
+                          points=[],
+                          targets=[],
+                          mask=None,
+                          lambda_mask=10,
+                          reg=0,
+                          feature_idx=5,
+                          r1=3,
+                          r2=12,
+                          random_seed=0,
+                          noise_mode='const',
+                          trunc_psi=0.7,
+                          force_fp32=False,
+                          layer_name=None,
+                          sel_channels=3,
+                          base_channel=0,
+                          img_scale_db=0,
+                          img_normalize=False,
+                          untransform=False,
+                          is_drag=False,
+                          reset=False,
+                          to_pil=False,
+                          **kwargs
+                          ):
         G = self.G
         ws = self.w
         if ws.dim() == 2:
+            ws = ws.unsqueeze(1).repeat(1, 6, 1)
+        ws = torch.cat([ws[:, :6, :], self.w0[:, 6:, :]], dim=1)
         if hasattr(self, 'points'):
             if len(points) != len(self.points):
                 reset = True
         # Run synthesis network.
         label = torch.zeros([1, G.c_dim], device=self._device)
+        img, feat = G(ws, label, truncation_psi=trunc_psi,
+                      noise_mode=noise_mode, input_is_w=True, return_feature=True)
         h, w = G.img_resolution, G.img_resolution
             X = torch.linspace(0, h, h)
             Y = torch.linspace(0, w, w)
             xx, yy = torch.meshgrid(X, Y)
+            feat_resize = F.interpolate(
+                feat[feature_idx], [h, w], mode='bilinear')
             if self.feat_refs is None:
+                self.feat0_resize = F.interpolate(
+                    feat[feature_idx].detach(), [h, w], mode='bilinear')
                 self.feat_refs = []
                 for point in points:
                     py, px = round(point[0]), round(point[1])
+                    self.feat_refs.append(self.feat0_resize[:, :, py, px])
+                self.points0_pt = torch.Tensor(points).unsqueeze(
+                    0).to(self._device)  # 1, N, 2
             # Point tracking with feature matching
             with torch.no_grad():
                     down = min(point[0] + r + 1, h)
                     left = max(point[1] - r, 0)
                     right = min(point[1] + r + 1, w)
+                    feat_patch = feat_resize[:, :, up:down, left:right]
+                    L2 = torch.linalg.norm(
+                        feat_patch - self.feat_refs[j].reshape(1, -1, 1, 1), dim=1)
+                    _, idx = torch.min(L2.view(1, -1), -1)
                     width = right - left
+                    point = [idx.item() // width + up, idx.item() %
+                             width + left]
                     points[j] = point
             res.points = [[point[0], point[1]] for point in points]
             loss_motion = 0
             res.stop = True
             for j, point in enumerate(points):
+                direction = torch.Tensor(
+                    [targets[j][1] - point[1], targets[j][0] - point[0]])
                 if torch.linalg.norm(direction) > max(2 / 512 * h, 2):
                     res.stop = False
                 if torch.linalg.norm(direction) > 1:
+                    distance = (
+                        (xx.to(self._device) - point[0])**2 + (yy.to(self._device) - point[1])**2)**0.5
                     relis, reljs = torch.where(distance < round(r1 / 512 * h))
+                    direction = direction / \
+                        (torch.linalg.norm(direction) + 1e-7)
                     gridh = (relis-direction[1]) / (h-1) * 2 - 1
                     gridw = (reljs-direction[0]) / (w-1) * 2 - 1
+                    grid = torch.stack(
+                        [gridw, gridh], dim=-1).unsqueeze(0).unsqueeze(0)
+                    target = F.grid_sample(
+                        feat_resize.float(), grid, align_corners=True).squeeze(2)
+                    loss_motion += F.l1_loss(
+                        feat_resize[:, :, relis, reljs], target.detach())
             loss = loss_motion
             if mask is not None:
                 if mask.min() == 0 and mask.max() == 1:
                     mask_usq = mask.to(self._device).unsqueeze(0).unsqueeze(0)
+                    loss_fix = F.l1_loss(
+                        feat_resize * mask_usq, self.feat0_resize * mask_usq)
                     loss += lambda_mask * loss_fix
             loss += reg * F.l1_loss(ws, self.w0)  # latent code regularization
         # Scale and convert to uint8.
         img = img[0]
         if img_normalize:
+            img = img / img.norm(float('inf'),
+                                 dim=[1, 2], keepdim=True).clip(1e-8, 1e8)
         img = img * (10 ** (img_scale_db / 20))
+        img = (img * 127.5 + 128).clamp(0,
+                                        255).to(torch.uint8).permute(1, 2, 0)
         if to_pil:
             from PIL import Image
             img = img.cpu().numpy()
         res.image = img
         res.w = ws.detach().cpu().numpy()
+# ----------------------------------------------------------------------------