Spaces:
Runtime error
Runtime error
import librosa | |
import math, numpy as np | |
import numpy as np | |
import os, sys | |
THIS_DIR = os.path.dirname(os.path.abspath(__file__)) | |
ROOT_DIR = os.path.abspath(os.path.join(THIS_DIR, os.pardir)) | |
DATA_DIR = os.path.join(ROOT_DIR, 'data') | |
sys.path.append(ROOT_DIR) | |
import feature_extraction.madmom as madmom | |
from feature_extraction.madmom.audio.cepstrogram import MFCC | |
def extract_features_spectral_flux(music_file,tgt_fps=20): | |
filtbank = madmom.audio.filters.MelFilterbank | |
spec = madmom.audio.spectrogram.Spectrogram(music_file, fps=tgt_fps, filterbank=filtbank, num_channels = 1) | |
spectralflux = madmom.features.onsets.spectral_flux(spec) | |
return np.expand_dims(spectralflux, dim=1) | |
def extract_features_madmombeat(music_file,tgt_fps=20): | |
proc_dwn = madmom.features.RNNDownBeatProcessor() | |
beats = proc_dwn(music_file, fps=20) | |
return beats | |
def extract_features_multi_mel(y, sr=44100.0, hop=512, nffts=[1024, 2048, 4096], mel_dim=100): | |
featuress = [] | |
for nfft in nffts: | |
mel = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=mel_dim, n_fft=nfft, hop_length=hop) # C2 is 65.4 Hz | |
features = librosa.power_to_db(mel, ref=np.max) | |
featuress.append(features) | |
features = np.stack(featuress, axis=1) | |
return features | |
def extract_features_hybrid(y,sr,hop,mel_dim=12,window_mult=1): | |
hop -= hop % 32 # Chroma CQT only accepts hop lengths that are multiples of 32, so this ensures that condition is met | |
window = window_mult * hop # Fast Fourier Transform Window Size is a multiple (default 1) of the hop | |
y_harm, y_perc = librosa.effects.hpss(y) | |
mels = librosa.feature.melspectrogram(y=y_perc, sr=sr,n_fft=window,hop_length=hop,n_mels=mel_dim, fmax=65.4) # C2 is 65.4 Hz | |
cqts = librosa.feature.chroma_cqt(y=y_harm, sr=sr,hop_length= hop, | |
norm=np.inf, threshold=0, n_chroma=12, | |
n_octaves=6, fmin=65.4, cqt_mode='full') | |
joint = np.concatenate((mels, cqts), axis=0) | |
return joint | |
def extract_features_mel(y, sr, hop,mel_dim=100): | |
mel = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=mel_dim, hop_length=hop) # C2 is 65.4 Hz | |
features = librosa.power_to_db(mel, ref=np.max) | |
return features | |
def extract_features_envelope(y, sr, hop,mel_dim=100): | |
envelope = librosa.onset.onset_strength(y=y,hop_length=hop, n_mels=mel_dim) | |
return np.expand_dims(envelope,1) | |
def extract_features_chroma(y,sr, state_times): | |
#hop = #int((44100 * 60 * beat_discretization) / bpm) Hop length must be a multiple of 2^6 | |
chromagram = librosa.feature.chroma_cqt(y=y, sr=sr, C=None, fmin=None, | |
norm=np.inf, threshold=0.0, tuning=None, n_chroma=12, | |
n_octaves=7, window=None, bins_per_octave=None, cqt_mode='full') | |
# Aggregate chroma features between beat events | |
# We'll use the median value of each feature between beat frames | |
state_frames = librosa.core.time_to_frames(state_times,sr=sr) # Default hop length of 512 | |
#TODO: CHANGE THIS TO BECOME LIKE HYBRID IF WE ARE TO EVER USE THIS | |
beat_chroma = librosa.util.sync(chromagram, state_frames, aggregate=np.median, pad=True, axis=-1) | |
return beat_chroma | |
def extract_features_mfcc(y,sr,state_times): | |
mfcc = librosa.feature.mfcc(y=y, sr=sr) # we can add other specified parameters | |
state_frames = librosa.core.time_to_frames(state_times,sr=sr) | |
beat_mfcc = librosa.util.sync(mfcc, state_frames, aggregate=np.median, pad=True, axis=-1) | |
return beat_mfcc | |