|
|
|
|
|
|
|
|
|
|
|
|
| """Improved diffusion model architecture proposed in the paper
|
| "Analyzing and Improving the Training Dynamics of Diffusion Models"."""
|
|
|
| import numpy as np
|
| import torch
|
|
|
|
|
|
|
|
|
|
|
| _constant_cache = dict()
|
|
|
|
|
| def constant(value, shape=None, dtype=None, device=None, memory_format=None):
|
| value = np.asarray(value)
|
| if shape is not None:
|
| shape = tuple(shape)
|
| if dtype is None:
|
| dtype = torch.get_default_dtype()
|
| if device is None:
|
| device = torch.device('cpu')
|
| if memory_format is None:
|
| memory_format = torch.contiguous_format
|
|
|
| key = (value.shape, value.dtype, value.tobytes(), shape, dtype, device, memory_format)
|
| tensor = _constant_cache.get(key, None)
|
| if tensor is None:
|
| tensor = torch.as_tensor(value.copy(), dtype=dtype, device=device)
|
| if shape is not None:
|
| tensor, _ = torch.broadcast_tensors(tensor, torch.empty(shape))
|
| tensor = tensor.contiguous(memory_format=memory_format)
|
| _constant_cache[key] = tensor
|
| return tensor
|
|
|
|
|
| def const_like(ref, value, shape=None, dtype=None, device=None, memory_format=None):
|
| if dtype is None:
|
| dtype = ref.dtype
|
| if device is None:
|
| device = ref.device
|
| return constant(value, shape=shape, dtype=dtype, device=device, memory_format=memory_format)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| def normalize(x, dim=None, eps=1e-4):
|
| if dim is None:
|
| dim = list(range(1, x.ndim))
|
| norm = torch.linalg.vector_norm(x, dim=dim, keepdim=True, dtype=torch.float32)
|
| norm = torch.add(eps, norm, alpha=np.sqrt(norm.numel() / x.numel()))
|
| return x / norm.to(x.dtype)
|
|
|
|
|
| class Normalize(torch.nn.Module):
|
|
|
| def __init__(self, dim=None, eps=1e-4):
|
| super().__init__()
|
| self.dim = dim
|
| self.eps = eps
|
|
|
| def forward(self, x):
|
| return normalize(x, dim=self.dim, eps=self.eps)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| def resample(x, f=[1, 1], mode='keep'):
|
| if mode == 'keep':
|
| return x
|
| f = np.float32(f)
|
| assert f.ndim == 1 and len(f) % 2 == 0
|
| pad = (len(f) - 1) // 2
|
| f = f / f.sum()
|
| f = np.outer(f, f)[np.newaxis, np.newaxis, :, :]
|
| f = const_like(x, f)
|
| c = x.shape[1]
|
| if mode == 'down':
|
| return torch.nn.functional.conv2d(x,
|
| f.tile([c, 1, 1, 1]),
|
| groups=c,
|
| stride=2,
|
| padding=(pad, ))
|
| assert mode == 'up'
|
| return torch.nn.functional.conv_transpose2d(x, (f * 4).tile([c, 1, 1, 1]),
|
| groups=c,
|
| stride=2,
|
| padding=(pad, ))
|
|
|
|
|
|
|
|
|
|
|
|
|
| def mp_silu(x):
|
| return torch.nn.functional.silu(x) / 0.596
|
|
|
|
|
| class MPSiLU(torch.nn.Module):
|
|
|
| def forward(self, x):
|
| return mp_silu(x)
|
|
|
|
|
|
|
|
|
|
|
|
|
| def mp_sum(a, b, t=0.5):
|
| return a.lerp(b, t) / np.sqrt((1 - t)**2 + t**2)
|
|
|
|
|
|
|
|
|
|
|
|
|
| def mp_cat(a, b, dim=1, t=0.5):
|
| Na = a.shape[dim]
|
| Nb = b.shape[dim]
|
| C = np.sqrt((Na + Nb) / ((1 - t)**2 + t**2))
|
| wa = C / np.sqrt(Na) * (1 - t)
|
| wb = C / np.sqrt(Nb) * t
|
| return torch.cat([wa * a, wb * b], dim=dim)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| class MPConv1D(torch.nn.Module):
|
|
|
| def __init__(self, in_channels, out_channels, kernel_size):
|
| super().__init__()
|
| self.out_channels = out_channels
|
| self.weight = torch.nn.Parameter(torch.randn(out_channels, in_channels, kernel_size))
|
|
|
| self.weight_norm_removed = False
|
|
|
| def forward(self, x, gain=1):
|
| assert self.weight_norm_removed, 'call remove_weight_norm() before inference'
|
|
|
| w = self.weight * gain
|
| if w.ndim == 2:
|
| return x @ w.t()
|
| assert w.ndim == 3
|
| return torch.nn.functional.conv1d(x, w, padding=(w.shape[-1] // 2, ))
|
|
|
| def remove_weight_norm(self):
|
| w = self.weight.to(torch.float32)
|
| w = normalize(w)
|
| w = w / np.sqrt(w[0].numel())
|
| w = w.to(self.weight.dtype)
|
| self.weight.data.copy_(w)
|
|
|
| self.weight_norm_removed = True
|
| return self
|
|
|