faces-through-time / color_transfer_loss.py
echen01
working demo
2fec875
from typing import List, Optional
import torch
from torch import nn
from torch.nn.functional import (
smooth_l1_loss,
)
def flatten_CHW(im: torch.Tensor) -> torch.Tensor:
"""
(B, C, H, W) -> (B, -1)
"""
B = im.shape[0]
return im.reshape(B, -1)
def stddev(x: torch.Tensor) -> torch.Tensor:
"""
x: (B, -1), assume with mean normalized
Retuens:
stddev: (B)
"""
return torch.sqrt(torch.mean(x * x, dim=-1))
def gram_matrix(input_):
B, C = input_.shape[:2]
features = input_.view(B, C, -1)
N = features.shape[-1]
G = torch.bmm(features, features.transpose(1, 2)) # C x C
return G.div(C * N)
class ColorTransferLoss(nn.Module):
"""Penalize the gram matrix difference between StyleGAN2's ToRGB outputs"""
def __init__(
self,
init_rgbs,
scale_rgb: bool = False
):
super().__init__()
with torch.no_grad():
init_feats = [x.detach() for x in init_rgbs]
self.stds = [stddev(flatten_CHW(rgb)) if scale_rgb else 1 for rgb in init_feats] # (B, 1, 1, 1) or scalar
self.grams = [gram_matrix(rgb / std) for rgb, std in zip(init_feats, self.stds)]
def forward(self, rgbs: List[torch.Tensor], level: int = None):
if level is None:
level = len(self.grams)
feats = rgbs
loss = 0
for i, (rgb, std) in enumerate(zip(feats[:level], self.stds[:level])):
G = gram_matrix(rgb / std)
loss = loss + smooth_l1_loss(G, self.grams[i])
return loss