Spaces:
Running
Running
# Copyright (c) 2023 Amphion. | |
# | |
# This source code is licensed under the MIT license found in the | |
# LICENSE file in the root directory of this source tree. | |
import torch | |
import torch.nn as nn | |
import torch.nn.functional as F | |
import torch | |
import torch.nn as nn | |
import torch.nn.functional as F | |
import numpy as np | |
from torch.nn.utils import weight_norm | |
# This code is adopted from MelGAN under the MIT License | |
# https://github.com/descriptinc/melgan-neurips | |
def weights_init(m): | |
classname = m.__class__.__name__ | |
if classname.find("Conv") != -1: | |
m.weight.data.normal_(0.0, 0.02) | |
elif classname.find("BatchNorm2d") != -1: | |
m.weight.data.normal_(1.0, 0.02) | |
m.bias.data.fill_(0) | |
def WNConv1d(*args, **kwargs): | |
return weight_norm(nn.Conv1d(*args, **kwargs)) | |
def WNConvTranspose1d(*args, **kwargs): | |
return weight_norm(nn.ConvTranspose1d(*args, **kwargs)) | |
class ResnetBlock(nn.Module): | |
def __init__(self, dim, dilation=1): | |
super().__init__() | |
self.block = nn.Sequential( | |
nn.LeakyReLU(0.2), | |
nn.ReflectionPad1d(dilation), | |
WNConv1d(dim, dim, kernel_size=3, dilation=dilation), | |
nn.LeakyReLU(0.2), | |
WNConv1d(dim, dim, kernel_size=1), | |
) | |
self.shortcut = WNConv1d(dim, dim, kernel_size=1) | |
def forward(self, x): | |
return self.shortcut(x) + self.block(x) | |
class MelGAN(nn.Module): | |
def __init__(self, cfg): | |
super().__init__() | |
self.cfg = cfg | |
self.hop_length = np.prod(self.cfg.model.melgan.ratios) | |
mult = int(2 ** len(self.cfg.model.melgan.ratios)) | |
model = [ | |
nn.ReflectionPad1d(3), | |
WNConv1d( | |
self.cfg.preprocess.n_mel, | |
mult * self.cfg.model.melgan.ngf, | |
kernel_size=7, | |
padding=0, | |
), | |
] | |
# Upsample to raw audio scale | |
for i, r in enumerate(self.cfg.model.melgan.ratios): | |
model += [ | |
nn.LeakyReLU(0.2), | |
WNConvTranspose1d( | |
mult * self.cfg.model.melgan.ngf, | |
mult * self.cfg.model.melgan.ngf // 2, | |
kernel_size=r * 2, | |
stride=r, | |
padding=r // 2 + r % 2, | |
output_padding=r % 2, | |
), | |
] | |
for j in range(self.cfg.model.melgan.n_residual_layers): | |
model += [ | |
ResnetBlock(mult * self.cfg.model.melgan.ngf // 2, dilation=3**j) | |
] | |
mult //= 2 | |
model += [ | |
nn.LeakyReLU(0.2), | |
nn.ReflectionPad1d(3), | |
WNConv1d(self.cfg.model.melgan.ngf, 1, kernel_size=7, padding=0), | |
nn.Tanh(), | |
] | |
self.model = nn.Sequential(*model) | |
self.apply(weights_init) | |
def forward(self, x): | |
return self.model(x) | |