import torch import torch.nn as nn from librosa.filters import mel as librosa_mel_fn class MelSpectrogram(nn.Module): def __init__(self, sample_rate: int = 22050, n_fft: int = 1024, win_length: int = 1024, hop_length: int = 256, n_mels: int = 80, f_min: float = 0, f_max: float = 8000.0, norm: str = 'slaney', center: bool = False ): super().__init__() self.sample_rate = sample_rate self.n_fft = n_fft self.hop_length = hop_length self.win_length = win_length self.center = center self.pad_length = int((n_fft - hop_length)/2) mel_basis = torch.Tensor(librosa_mel_fn(sr=sample_rate, n_fft=n_fft, n_mels=n_mels, fmin=f_min, fmax=f_max, norm=norm)) window_fn = torch.hann_window(win_length) self.register_buffer('mel_basis', mel_basis) self.register_buffer('window_fn', window_fn) def forward(self, x): x_pad = torch.nn.functional.pad( x, (self.pad_length, self.pad_length), mode='reflect') spec_lin = torch.stft(x_pad, self.n_fft, self.hop_length, self.win_length, self.window_fn, center=self.center, return_complex=True) # [B, F, T] spec_mag = spec_lin.abs().pow_(2).add_(1e-9).sqrt_() spec_mel = torch.matmul(self.mel_basis, spec_mag) # [B, mels, T] return spec_mel