myTest01 / feature_extraction /audio_feature_utils.py
meng2003's picture
Upload 357 files
2d5fdd1
import librosa
import math, numpy as np
import numpy as np
import os, sys
THIS_DIR = os.path.dirname(os.path.abspath(__file__))
ROOT_DIR = os.path.abspath(os.path.join(THIS_DIR, os.pardir))
DATA_DIR = os.path.join(ROOT_DIR, 'data')
sys.path.append(ROOT_DIR)
import feature_extraction.madmom as madmom
from feature_extraction.madmom.audio.cepstrogram import MFCC
def extract_features_spectral_flux(music_file,tgt_fps=20):
filtbank = madmom.audio.filters.MelFilterbank
spec = madmom.audio.spectrogram.Spectrogram(music_file, fps=tgt_fps, filterbank=filtbank, num_channels = 1)
spectralflux = madmom.features.onsets.spectral_flux(spec)
return np.expand_dims(spectralflux, dim=1)
def extract_features_madmombeat(music_file,tgt_fps=20):
proc_dwn = madmom.features.RNNDownBeatProcessor()
beats = proc_dwn(music_file, fps=20)
return beats
def extract_features_multi_mel(y, sr=44100.0, hop=512, nffts=[1024, 2048, 4096], mel_dim=100):
featuress = []
for nfft in nffts:
mel = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=mel_dim, n_fft=nfft, hop_length=hop) # C2 is 65.4 Hz
features = librosa.power_to_db(mel, ref=np.max)
featuress.append(features)
features = np.stack(featuress, axis=1)
return features
def extract_features_hybrid(y,sr,hop,mel_dim=12,window_mult=1):
hop -= hop % 32 # Chroma CQT only accepts hop lengths that are multiples of 32, so this ensures that condition is met
window = window_mult * hop # Fast Fourier Transform Window Size is a multiple (default 1) of the hop
y_harm, y_perc = librosa.effects.hpss(y)
mels = librosa.feature.melspectrogram(y=y_perc, sr=sr,n_fft=window,hop_length=hop,n_mels=mel_dim, fmax=65.4) # C2 is 65.4 Hz
cqts = librosa.feature.chroma_cqt(y=y_harm, sr=sr,hop_length= hop,
norm=np.inf, threshold=0, n_chroma=12,
n_octaves=6, fmin=65.4, cqt_mode='full')
joint = np.concatenate((mels, cqts), axis=0)
return joint
def extract_features_mel(y, sr, hop,mel_dim=100):
mel = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=mel_dim, hop_length=hop) # C2 is 65.4 Hz
features = librosa.power_to_db(mel, ref=np.max)
return features
def extract_features_envelope(y, sr, hop,mel_dim=100):
envelope = librosa.onset.onset_strength(y=y,hop_length=hop, n_mels=mel_dim)
return np.expand_dims(envelope,1)
def extract_features_chroma(y,sr, state_times):
#hop = #int((44100 * 60 * beat_discretization) / bpm) Hop length must be a multiple of 2^6
chromagram = librosa.feature.chroma_cqt(y=y, sr=sr, C=None, fmin=None,
norm=np.inf, threshold=0.0, tuning=None, n_chroma=12,
n_octaves=7, window=None, bins_per_octave=None, cqt_mode='full')
# Aggregate chroma features between beat events
# We'll use the median value of each feature between beat frames
state_frames = librosa.core.time_to_frames(state_times,sr=sr) # Default hop length of 512
#TODO: CHANGE THIS TO BECOME LIKE HYBRID IF WE ARE TO EVER USE THIS
beat_chroma = librosa.util.sync(chromagram, state_frames, aggregate=np.median, pad=True, axis=-1)
return beat_chroma
def extract_features_mfcc(y,sr,state_times):
mfcc = librosa.feature.mfcc(y=y, sr=sr) # we can add other specified parameters
state_frames = librosa.core.time_to_frames(state_times,sr=sr)
beat_mfcc = librosa.util.sync(mfcc, state_frames, aggregate=np.median, pad=True, axis=-1)
return beat_mfcc