# Make function to find classes in target directory import os import librosa import torch import numpy as np from torchaudio.transforms import Resample SAMPLE_RATE = 44100 AUDIO_LEN = 2.90 # Parameters to control the MelSpec generation N_MELS = 128 F_MIN = 20 F_MAX = 16000 N_FFT = 1024 HOP_LEN = 512 # Make function to find classes in target directory def find_classes(directory: str): # 1. Get the class names by scanning the target directory classes = sorted(entry.name for entry in os.scandir(directory) if entry.is_dir()) # 2. Raise an error if class names not found if not classes: raise FileNotFoundError(f"Couldn't find any classes in {directory}.") # 3. Crearte a dictionary of index labels (computers prefer numerical rather than string labels) class_to_idx = {cls_name: i for i, cls_name in enumerate(classes)} return classes, class_to_idx def resample(wav, sample_rate, new_sample_rate): if wav.shape[0] >= 2: wav = torch.mean(wav, dim=0) else: wav = wav.squeeze(0) if sample_rate > new_sample_rate: resampler = Resample(sample_rate, new_sample_rate) wav = resampler(wav) return wav def mono_to_color(X, eps=1e-6, mean=None, std=None): X = np.stack([X, X, X], axis=-1) # Standardize mean = mean or X.mean() std = std or X.std() X = (X - mean) / (std + eps) # Normalize to [0, 255] _min, _max = X.min(), X.max() if (_max - _min) > eps: V = np.clip(X, _min, _max) V = 255 * (V - _min) / (_max - _min) V = V.astype(np.uint8) else: V = np.zeros_like(X, dtype=np.uint8) return V def normalize(image, mean=None, std=None): image = image / 255.0 if mean is not None and std is not None: image = (image - mean) / std return np.moveaxis(image, 2, 0).astype(np.float32) def compute_melspec(wav, sample_rate=SAMPLE_RATE): melspec = librosa.feature.melspectrogram( y=wav, sr=sample_rate, n_fft=N_FFT, fmin=F_MIN, fmax=F_MAX, n_mels=N_MELS, hop_length=HOP_LEN ) melspec = librosa.power_to_db(melspec).astype(np.float32) return melspec def audio_preprocess(wav, sample_rate): wav = wav.numpy() melspec = compute_melspec(wav, sample_rate) image = mono_to_color(melspec) image = normalize(image, mean=None, std=None) image = torch.from_numpy(image) return image