Spaces:
Runtime error
Runtime error
# Copyright (c) 2023 Amphion. | |
# | |
# This source code is licensed under the MIT license found in the | |
# LICENSE file in the root directory of this source tree. | |
#################### NSF #################### | |
import torch | |
import numpy as np | |
import torch.nn as nn | |
import torch.nn.functional as F | |
# This code is adopted from Xin Wang's NSF under the MIT License | |
# https://github.com/nii-yamagishilab/project-NN-Pytorch-scripts | |
class SineGen(nn.Module): | |
def __init__( | |
self, fs, harmonic_num=0, amp=0.1, noise_std=0.003, voiced_threshold=0 | |
): | |
super(SineGen, self).__init__() | |
self.amp = amp | |
self.noise_std = noise_std | |
self.harmonic_num = harmonic_num | |
self.dim = harmonic_num + 1 | |
self.fs = fs | |
self.voice_threshold = voiced_threshold | |
def _f0toUnvoiced(self, f0): | |
uv = torch.ones_like(f0) | |
uv = uv * (f0 > self.voice_threshold) | |
return uv | |
def forward(self, f0, upp): | |
f0 = f0.unsqueeze(-1) | |
fn = torch.multiply( | |
f0, torch.arange(1, self.dim + 1, device=f0.device).reshape(1, 1, -1) | |
) | |
rad_values = (fn / self.fs) % 1 | |
rand_ini = torch.rand(fn.shape[0], fn.shape[2], device=fn.device) | |
rand_ini[:, 0] = 0 | |
rad_values[:, 0, :] = rad_values[:, 0, :] + rand_ini | |
is_half = rad_values.dtype is not torch.float32 | |
tmp_over_one = torch.cumsum(rad_values.double(), 1) | |
if is_half: | |
tmp_over_one = tmp_over_one.half() | |
else: | |
tmp_over_one = tmp_over_one.float() | |
tmp_over_one *= upp | |
tmp_over_one = F.interpolate( | |
tmp_over_one.transpose(2, 1), | |
scale_factor=upp, | |
mode="linear", | |
align_corners=True, | |
).transpose(2, 1) | |
rad_values = F.interpolate( | |
rad_values.transpose(2, 1), scale_factor=upp, mode="nearest" | |
).transpose(2, 1) | |
tmp_over_one %= 1 | |
tmp_over_one_idx = (tmp_over_one[:, 1:, :] - tmp_over_one[:, :-1, :]) < 0 | |
cumsum_shift = torch.zeros_like(rad_values) | |
cumsum_shift[:, 1:, :] = tmp_over_one_idx * (-1.0) | |
rad_values = rad_values.double() | |
cumsum_shift = cumsum_shift.double() | |
sine_waves = torch.sin( | |
torch.cumsum(rad_values + cumsum_shift, dim=1) * 2 * np.pi | |
) | |
if is_half: | |
sine_waves = sine_waves.half() | |
else: | |
sine_waves = sine_waves.float() | |
sine_waves = sine_waves * self.amp | |
uv = self._f0toUnvoiced(f0) | |
uv = F.interpolate( | |
uv.transpose(2, 1), scale_factor=upp, mode="nearest" | |
).transpose(2, 1) | |
noise_amp = uv * self.noise_std + (1 - uv) * self.amp / 3 | |
noise = noise_amp * torch.randn_like(sine_waves) | |
sine_waves = sine_waves * uv + noise | |
return sine_waves, uv, noise | |