File size: 1,420 Bytes
abe4e94
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import numpy as np
import soundfile
import librosa

def extract_feature(file_name, **kwargs):
    
    chroma = kwargs.get("chroma")
    contrast = kwargs.get("contrast")
    mfcc = kwargs.get("mfcc")
    mel = kwargs.get("mel")
    tonnetz = kwargs.get("tonnetz")
    
    with soundfile.SoundFile(file_name) as audio_clip:
        X = audio_clip.read(dtype="float32")
        sound_fourier = np.abs(librosa.stft(X))   # Conducting short time fourier transform of audio clip
        result = np.array([])
        
        if mfcc:
            mfccs = np.mean(librosa.feature.mfcc(y=X, sr=audio_clip.samplerate, n_mfcc=40).T, axis=0)
            result = np.hstack((result, mfccs))
        if chroma:
            chroma = np.mean(librosa.feature.chroma_stft(S=sound_fourier, sr=audio_clip.samplerate).T, axis=0)
            result = np.hstack((result, chroma))
        if mel:
            mel = np.mean(librosa.feature.melspectrogram(X, sr=audio_clip.samplerate).T, axis=0)
            result = np.hstack((result, mel))
        if contrast:
            contrast = np.mean(librosa.feature.spectral_contrast(S=sound_fourier, sr=audio_clip.samplerate).T, axis=0)
            result = np.hstack((result, contrast))
        if tonnetz:
            tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=audio_clip.samplerate).T, axis=0)
            result = np.hstack((result, tonnetz))
    return result