import argparse import os import os.path as P from copy import deepcopy from functools import partial from glob import glob from multiprocessing import Pool from pathlib import Path import librosa import numpy as np import torchvision class MelSpectrogram(object): def __init__(self, sr, nfft, fmin, fmax, nmels, hoplen, spec_power, inverse=False): self.sr = sr self.nfft = nfft self.fmin = fmin self.fmax = fmax self.nmels = nmels self.hoplen = hoplen self.spec_power = spec_power self.inverse = inverse self.mel_basis = librosa.filters.mel(sr=sr, n_fft=nfft, fmin=fmin, fmax=fmax, n_mels=nmels) def __call__(self, x): if self.inverse: spec = librosa.feature.inverse.mel_to_stft( x, sr=self.sr, n_fft=self.nfft, fmin=self.fmin, fmax=self.fmax, power=self.spec_power ) wav = librosa.griffinlim(spec, hop_length=self.hoplen) return wav else: spec = np.abs(librosa.stft(x, n_fft=self.nfft, hop_length=self.hoplen)) ** self.spec_power mel_spec = np.dot(self.mel_basis, spec) return mel_spec class LowerThresh(object): def __init__(self, min_val, inverse=False): self.min_val = min_val self.inverse = inverse def __call__(self, x): if self.inverse: return x else: return np.maximum(self.min_val, x) class Add(object): def __init__(self, val, inverse=False): self.inverse = inverse self.val = val def __call__(self, x): if self.inverse: return x - self.val else: return x + self.val class Subtract(Add): def __init__(self, val, inverse=False): self.inverse = inverse self.val = val def __call__(self, x): if self.inverse: return x + self.val else: return x - self.val class Multiply(object): def __init__(self, val, inverse=False) -> None: self.val = val self.inverse = inverse def __call__(self, x): if self.inverse: return x / self.val else: return x * self.val class Divide(Multiply): def __init__(self, val, inverse=False): self.inverse = inverse self.val = val def __call__(self, x): if self.inverse: return x * self.val else: return x / self.val class Log10(object): def __init__(self, inverse=False): self.inverse = inverse def __call__(self, x): if self.inverse: return 10 ** x else: return np.log10(x) class Clip(object): def __init__(self, min_val, max_val, inverse=False): self.min_val = min_val self.max_val = max_val self.inverse = inverse def __call__(self, x): if self.inverse: return x else: return np.clip(x, self.min_val, self.max_val) class TrimSpec(object): def __init__(self, max_len, inverse=False): self.max_len = max_len self.inverse = inverse def __call__(self, x): if self.inverse: return x else: return x[:, :self.max_len] class MaxNorm(object): def __init__(self, inverse=False): self.inverse = inverse self.eps = 1e-10 def __call__(self, x): if self.inverse: return x else: return x / (x.max() + self.eps) TRANSFORMS_16000 = torchvision.transforms.Compose([ MelSpectrogram(sr=16000, nfft=1024, fmin=125, fmax=7600, nmels=80, hoplen=1024//4, spec_power=1), LowerThresh(1e-5), Log10(), Multiply(20), Subtract(20), Add(100), Divide(100), Clip(0, 1.0) # TrimSpec(860) ])