import copy import os import random import urllib.request import torch import torch.nn.functional as FF import torch.optim from torchvision import utils from tqdm import tqdm from stylegan2.model import Generator class DownloadProgressBar(tqdm): def update_to(self, b=1, bsize=1, tsize=None): if tsize is not None: self.total = tsize self.update(b * bsize - self.n) def get_path(base_path): BASE_DIR = os.path.join('checkpoints') save_path = os.path.join(BASE_DIR, base_path) if not os.path.exists(save_path): url = f"https://huggingface.co/aaronb/StyleGAN2/resolve/main/{base_path}" print(f'{base_path} not found') print('Try to download from huggingface: ', url) os.makedirs(os.path.dirname(save_path), exist_ok=True) download_url(url, save_path) print('Downloaded to ', save_path) return save_path def download_url(url, output_path): with DownloadProgressBar(unit='B', unit_scale=True, miniters=1, desc=url.split('/')[-1]) as t: urllib.request.urlretrieve(url, filename=output_path, reporthook=t.update_to) class CustomGenerator(Generator): def prepare( self, styles, inject_index=None, truncation=1, truncation_latent=None, input_is_latent=False, noise=None, randomize_noise=True, ): if not input_is_latent: styles = [self.style(s) for s in styles] if noise is None: if randomize_noise: noise = [None] * self.num_layers else: noise = [ getattr(self.noises, f"noise_{i}") for i in range(self.num_layers) ] if truncation < 1: style_t = [] for style in styles: style_t.append( truncation_latent + truncation * (style - truncation_latent) ) styles = style_t if len(styles) < 2: inject_index = self.n_latent if styles[0].ndim < 3: latent = styles[0].unsqueeze(1).repeat(1, inject_index, 1) else: latent = styles[0] else: if inject_index is None: inject_index = random.randint(1, self.n_latent - 1) latent = styles[0].unsqueeze(1).repeat(1, inject_index, 1) latent2 = styles[1].unsqueeze(1).repeat(1, self.n_latent - inject_index, 1) latent = torch.cat([latent, latent2], 1) return latent, noise def generate( self, latent, noise, ): out = self.input(latent) out = self.conv1(out, latent[:, 0], noise=noise[0]) skip = self.to_rgb1(out, latent[:, 1]) i = 1 for conv1, conv2, noise1, noise2, to_rgb in zip( self.convs[::2], self.convs[1::2], noise[1::2], noise[2::2], self.to_rgbs ): out = conv1(out, latent[:, i], noise=noise1) out = conv2(out, latent[:, i + 1], noise=noise2) skip = to_rgb(out, latent[:, i + 2], skip) if out.shape[-1] == 256: F = out i += 2 image = skip F = FF.interpolate(F, image.shape[-2:], mode='bilinear') return image, F def stylegan2( size=1024, channel_multiplier=2, latent=512, n_mlp=8, ckpt='stylegan2-ffhq-config-f.pt' ): g_ema = CustomGenerator(size, latent, n_mlp, channel_multiplier=channel_multiplier) checkpoint = torch.load(get_path(ckpt)) g_ema.load_state_dict(checkpoint["g_ema"], strict=False) g_ema.requires_grad_(False) g_ema.eval() return g_ema def bilinear_interpolate_torch(im, y, x): """ im : B,C,H,W y : 1,numPoints -- pixel location y float x : 1,numPOints -- pixel location y float """ device = im.device x0 = torch.floor(x).long().to(device) x1 = x0 + 1 y0 = torch.floor(y).long().to(device) y1 = y0 + 1 wa = ((x1.float() - x) * (y1.float() - y)).to(device) wb = ((x1.float() - x) * (y - y0.float())).to(device) wc = ((x - x0.float()) * (y1.float() - y)).to(device) wd = ((x - x0.float()) * (y - y0.float())).to(device) # Instead of clamp x1 = x1 - torch.floor(x1 / im.shape[3]).int().to(device) y1 = y1 - torch.floor(y1 / im.shape[2]).int().to(device) Ia = im[:, :, y0, x0] Ib = im[:, :, y1, x0] Ic = im[:, :, y0, x1] Id = im[:, :, y1, x1] return Ia * wa + Ib * wb + Ic * wc + Id * wd def drag_gan(g_ema, latent: torch.Tensor, noise, F, handle_points, target_points, mask, max_iters=1000): handle_points0 = copy.deepcopy(handle_points) n = len(handle_points) r1, r2, lam, d = 3, 12, 20, 1 def neighbor(x, y, d): points = [] for i in range(x - d, x + d): for j in range(y - d, y + d): points.append(torch.tensor([i, j]).float().to(latent.device)) return points F0 = F.detach().clone() latent_trainable = latent[:, :6, :].detach().clone().requires_grad_(True) latent_untrainable = latent[:, 6:, :].detach().clone().requires_grad_(False) optimizer = torch.optim.Adam([latent_trainable], lr=2e-3) for iter in range(max_iters): for s in range(1): optimizer.zero_grad() latent = torch.cat([latent_trainable, latent_untrainable], dim=1) sample2, F2 = g_ema.generate(latent, noise) # motion supervision loss = 0 for i in range(n): pi, ti = handle_points[i], target_points[i] di = (ti - pi) / torch.sum((ti - pi)**2) for qi in neighbor(int(pi[0]), int(pi[1]), r1): # f1 = F[..., int(qi[0]), int(qi[1])] # f2 = F2[..., int(qi[0] + di[0]), int(qi[1] + di[1])] f1 = bilinear_interpolate_torch(F2, qi[0], qi[1]).detach() f2 = bilinear_interpolate_torch(F2, qi[0] + di[0], qi[1] + di[1]) loss += FF.l1_loss(f2, f1) if mask is not None: loss += ((F2 - F0) * (1 - mask)).abs().mean() * lam loss.backward() optimizer.step() # point tracking with torch.no_grad(): sample2, F2 = g_ema.generate(latent, noise) for i in range(n): pi = handle_points0[i] # f = F0[..., int(pi[0]), int(pi[1])] f0 = bilinear_interpolate_torch(F0, pi[0], pi[1]) minv = 1e9 minx = 1e9 miny = 1e9 for qi in neighbor(int(handle_points[i][0]), int(handle_points[i][1]), r2): # f2 = F2[..., int(qi[0]), int(qi[1])] f2 = bilinear_interpolate_torch(F2, qi[0], qi[1]) v = torch.norm(f2 - f0, p=1) if v < minv: minv = v minx = int(qi[0]) miny = int(qi[1]) handle_points[i][0] = minx handle_points[i][1] = miny F = F2.detach().clone() if iter % 1 == 0: print(iter, loss.item(), handle_points, target_points) # p = handle_points[0].int() # sample2[0, :, p[0] - 5:p[0] + 5, p[1] - 5:p[1] + 5] = sample2[0, :, p[0] - 5:p[0] + 5, p[1] - 5:p[1] + 5] * 0 # t = target_points[0].int() # sample2[0, :, t[0] - 5:t[0] + 5, t[1] - 5:t[1] + 5] = sample2[0, :, t[0] - 5:t[0] + 5, t[1] - 5:t[1] + 5] * 255 # sample2[0, :, 210, 134] = sample2[0, :, 210, 134] * 0 # utils.save_image(sample2, "test2.png", normalize=True, range=(-1, 1)) yield sample2, latent, F2, handle_points