RMSnow's picture
init and interface
df2accb
# Copyright (c) 2023 Amphion.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
import torch
import torch.nn.functional as F
import torch.nn as nn
from torch.nn.utils import weight_norm, remove_weight_norm, spectral_norm
from torch import nn
LRELU_SLOPE = 0.1
# This code is a refined MRD adopted from BigVGAN under the MIT License
# https://github.com/NVIDIA/BigVGAN
class DiscriminatorR(nn.Module):
def __init__(self, cfg, resolution):
super().__init__()
self.resolution = resolution
assert (
len(self.resolution) == 3
), "MRD layer requires list with len=3, got {}".format(self.resolution)
self.lrelu_slope = LRELU_SLOPE
norm_f = (
weight_norm if cfg.model.mrd.use_spectral_norm == False else spectral_norm
)
if cfg.model.mrd.mrd_override:
print(
"INFO: overriding MRD use_spectral_norm as {}".format(
cfg.model.mrd.mrd_use_spectral_norm
)
)
norm_f = (
weight_norm
if cfg.model.mrd.mrd_use_spectral_norm == False
else spectral_norm
)
self.d_mult = cfg.model.mrd.discriminator_channel_mult_factor
if cfg.model.mrd.mrd_override:
print(
"INFO: overriding mrd channel multiplier as {}".format(
cfg.model.mrd.mrd_channel_mult
)
)
self.d_mult = cfg.model.mrd.mrd_channel_mult
self.convs = nn.ModuleList(
[
norm_f(nn.Conv2d(1, int(32 * self.d_mult), (3, 9), padding=(1, 4))),
norm_f(
nn.Conv2d(
int(32 * self.d_mult),
int(32 * self.d_mult),
(3, 9),
stride=(1, 2),
padding=(1, 4),
)
),
norm_f(
nn.Conv2d(
int(32 * self.d_mult),
int(32 * self.d_mult),
(3, 9),
stride=(1, 2),
padding=(1, 4),
)
),
norm_f(
nn.Conv2d(
int(32 * self.d_mult),
int(32 * self.d_mult),
(3, 9),
stride=(1, 2),
padding=(1, 4),
)
),
norm_f(
nn.Conv2d(
int(32 * self.d_mult),
int(32 * self.d_mult),
(3, 3),
padding=(1, 1),
)
),
]
)
self.conv_post = norm_f(
nn.Conv2d(int(32 * self.d_mult), 1, (3, 3), padding=(1, 1))
)
def forward(self, x):
fmap = []
x = self.spectrogram(x)
x = x.unsqueeze(1)
for l in self.convs:
x = l(x)
x = F.leaky_relu(x, self.lrelu_slope)
fmap.append(x)
x = self.conv_post(x)
fmap.append(x)
x = torch.flatten(x, 1, -1)
return x, fmap
def spectrogram(self, x):
n_fft, hop_length, win_length = self.resolution
x = F.pad(
x,
(int((n_fft - hop_length) / 2), int((n_fft - hop_length) / 2)),
mode="reflect",
)
x = x.squeeze(1)
x = torch.stft(
x,
n_fft=n_fft,
hop_length=hop_length,
win_length=win_length,
center=False,
return_complex=True,
)
x = torch.view_as_real(x) # [B, F, TT, 2]
mag = torch.norm(x, p=2, dim=-1) # [B, F, TT]
return mag
class MultiResolutionDiscriminator(nn.Module):
def __init__(self, cfg, debug=False):
super().__init__()
self.resolutions = cfg.model.mrd.resolutions
assert (
len(self.resolutions) == 3
), "MRD requires list of list with len=3, each element having a list with len=3. got {}".format(
self.resolutions
)
self.discriminators = nn.ModuleList(
[DiscriminatorR(cfg, resolution) for resolution in self.resolutions]
)
def forward(self, y, y_hat):
y_d_rs = []
y_d_gs = []
fmap_rs = []
fmap_gs = []
for i, d in enumerate(self.discriminators):
y_d_r, fmap_r = d(x=y)
y_d_g, fmap_g = d(x=y_hat)
y_d_rs.append(y_d_r)
fmap_rs.append(fmap_r)
y_d_gs.append(y_d_g)
fmap_gs.append(fmap_g)
return y_d_rs, y_d_gs, fmap_rs, fmap_gs