Spaces:

jonathanjordan21
/

tts-rvc-autopst

Runtime error

File size: 3,300 Bytes

7ce5feb

import copy
import torch
import numpy as np
from scipy import signal
from librosa.filters import mel
from scipy.signal import get_window
import torch
import torch.nn as nn
import torch.nn.functional as F


def butter_highpass(cutoff, fs, order=5):
    nyq = 0.5 * fs
    normal_cutoff = cutoff / nyq
    b, a = signal.butter(order, normal_cutoff, btype='high', analog=False)
    return b, a
    
    
    
def pySTFT(x, fft_length=1024, hop_length=256):
    
    x = np.pad(x, int(fft_length//2), mode='reflect')
    
    noverlap = fft_length - hop_length
    shape = x.shape[:-1]+((x.shape[-1]-noverlap)//hop_length, fft_length)
    strides = x.strides[:-1]+(hop_length*x.strides[-1], x.strides[-1])
    result = np.lib.stride_tricks.as_strided(x, shape=shape,
                                             strides=strides)
    
    fft_window = get_window('hann', fft_length, fftbins=True)
    result = np.fft.rfft(fft_window * result, n=fft_length).T
    
    return np.abs(result)



class LinearNorm(torch.nn.Module):
    def __init__(self, in_dim, out_dim, bias=True, w_init_gain='linear'):
        super(LinearNorm, self).__init__()
        self.linear_layer = torch.nn.Linear(in_dim, out_dim, bias=bias)

        torch.nn.init.xavier_uniform_(
            self.linear_layer.weight,
            gain=torch.nn.init.calculate_gain(w_init_gain))

    def forward(self, x):
        return self.linear_layer(x)


class ConvNorm(torch.nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size=1, stride=1,
                 padding=None, dilation=1, bias=True, w_init_gain='linear'):
        super(ConvNorm, self).__init__()
        if padding is None:
            assert(kernel_size % 2 == 1)
            padding = int(dilation * (kernel_size - 1) / 2)

        self.conv = torch.nn.Conv1d(in_channels, out_channels,
                                    kernel_size=kernel_size, stride=stride,
                                    padding=padding, dilation=dilation,
                                    bias=bias)

        torch.nn.init.xavier_uniform_(
            self.conv.weight, gain=torch.nn.init.calculate_gain(w_init_gain))

    def forward(self, signal):
        conv_signal = self.conv(signal)
        return conv_signal
    
    

def filter_bank_mean(num_rep, codes_mask, max_len_long):
    '''
    num_rep (B, L)
    codes_mask (B, L)
    
    output: filterbank (B, L, max_len_fake)
    
    zero pad in codes must be real zero
    '''
    
    num_rep = num_rep.unsqueeze(-1) # (B, L, 1)
    codes_mask = codes_mask.unsqueeze(-1) # (B, L, 1)
    num_rep = num_rep * codes_mask
    
    right_edge = num_rep.cumsum(dim=1)
    left_edge = torch.zeros_like(right_edge)
    left_edge[:, 1:, :] = right_edge[:, :-1, :]
    right_edge = right_edge.ceil()
    left_edge = left_edge.floor()
    
    index = torch.arange(1, max_len_long+1, device=num_rep.device).view(1, 1, -1)
    
    lower = index - left_edge

    right_edge_flip = max_len_long - right_edge
    
    upper = (index - right_edge_flip).flip(dims=(2,))
    
    # triangular pooling
    fb = F.relu(torch.min(lower, upper)).float()
    
    # mean pooling
    fb = (fb > 0).float()
    
    norm = fb.sum(dim=-1, keepdim=True)
    norm[norm==0] = 1.0
    
    fb = fb / norm
    
    return fb * codes_mask