File size: 3,585 Bytes
2d5fdd1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import librosa
import math, numpy as np
import numpy as np
import os, sys

THIS_DIR = os.path.dirname(os.path.abspath(__file__))
ROOT_DIR = os.path.abspath(os.path.join(THIS_DIR, os.pardir))
DATA_DIR = os.path.join(ROOT_DIR, 'data')
sys.path.append(ROOT_DIR)

import feature_extraction.madmom as madmom
from feature_extraction.madmom.audio.cepstrogram import MFCC

def extract_features_spectral_flux(music_file,tgt_fps=20):
    filtbank = madmom.audio.filters.MelFilterbank
    spec = madmom.audio.spectrogram.Spectrogram(music_file, fps=tgt_fps, filterbank=filtbank, num_channels = 1)
    spectralflux = madmom.features.onsets.spectral_flux(spec)
    return np.expand_dims(spectralflux, dim=1)

def extract_features_madmombeat(music_file,tgt_fps=20):
    proc_dwn = madmom.features.RNNDownBeatProcessor()
    beats = proc_dwn(music_file, fps=20)
    return beats

def extract_features_multi_mel(y, sr=44100.0, hop=512, nffts=[1024, 2048, 4096], mel_dim=100):
    featuress = []
    for nfft in nffts:
        mel = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=mel_dim, n_fft=nfft, hop_length=hop)  # C2 is 65.4 Hz
        features = librosa.power_to_db(mel, ref=np.max)
        featuress.append(features)
    features = np.stack(featuress, axis=1)
    return features

def extract_features_hybrid(y,sr,hop,mel_dim=12,window_mult=1):
    hop -= hop % 32  #  Chroma CQT only accepts hop lengths that are multiples of 32, so this ensures that condition is met
    window = window_mult * hop # Fast Fourier Transform Window Size is a multiple (default 1) of the hop
    y_harm, y_perc = librosa.effects.hpss(y)
    mels = librosa.feature.melspectrogram(y=y_perc, sr=sr,n_fft=window,hop_length=hop,n_mels=mel_dim, fmax=65.4)  # C2 is 65.4 Hz
    cqts = librosa.feature.chroma_cqt(y=y_harm, sr=sr,hop_length= hop,
                                      norm=np.inf, threshold=0, n_chroma=12,
                                      n_octaves=6, fmin=65.4, cqt_mode='full')
    joint = np.concatenate((mels, cqts), axis=0)
    return joint


def extract_features_mel(y, sr, hop,mel_dim=100):
    mel = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=mel_dim, hop_length=hop)  # C2 is 65.4 Hz
    features = librosa.power_to_db(mel, ref=np.max)
    return features

def extract_features_envelope(y, sr, hop,mel_dim=100):
    envelope = librosa.onset.onset_strength(y=y,hop_length=hop, n_mels=mel_dim)
    return np.expand_dims(envelope,1)

def extract_features_chroma(y,sr, state_times):
    #hop = #int((44100 * 60 * beat_discretization) / bpm) Hop length must be a multiple of 2^6
    chromagram = librosa.feature.chroma_cqt(y=y, sr=sr, C=None, fmin=None,
                                            norm=np.inf, threshold=0.0, tuning=None, n_chroma=12,
                                            n_octaves=7, window=None, bins_per_octave=None, cqt_mode='full')
    # Aggregate chroma features between beat events
    # We'll use the median value of each feature between beat frames
    state_frames = librosa.core.time_to_frames(state_times,sr=sr) # Default hop length of 512
    #TODO: CHANGE THIS TO BECOME LIKE HYBRID IF WE ARE TO EVER USE THIS
    beat_chroma = librosa.util.sync(chromagram, state_frames, aggregate=np.median, pad=True, axis=-1)
    return beat_chroma

def extract_features_mfcc(y,sr,state_times):
    mfcc = librosa.feature.mfcc(y=y, sr=sr) # we can add other specified parameters
    state_frames = librosa.core.time_to_frames(state_times,sr=sr)
    beat_mfcc = librosa.util.sync(mfcc, state_frames, aggregate=np.median, pad=True, axis=-1)
    return beat_mfcc