JJ / FeaturesExtractor.py
Nikhil0987's picture
J
90501fb
raw
history blame
2.29 kB
import numpy as np
from sklearn import preprocessing
from scipy.io.wavfile import read
from python_speech_features import mfcc
from python_speech_features import delta
class FeaturesExtractor:
def __init__(self):
pass
def extract_features(self, audio_path):
"""
Extract voice features including the Mel Frequency Cepstral Coefficient (MFCC)
from an audio using the python_speech_features module, performs Cepstral Mean
Normalization (CMS) and combine it with MFCC deltas and the MFCC double
deltas.
Args:
audio_path (str) : path to wave file without silent moments.
Returns:
(array) : Extracted features matrix.
"""
rate, audio = read(audio_path)
mfcc_feature = mfcc(# The audio signal from which to compute features.
audio,
# The samplerate of the signal we are working with.
rate,
# The length of the analysis window in seconds.
# Default is 0.025s (25 milliseconds)
winlen = 0.05,
# The step between successive windows in seconds.
# Default is 0.01s (10 milliseconds)
winstep = 0.01,
# The number of cepstrum to return.
# Default 13.
numcep = 5,
# The number of filters in the filterbank.
# Default is 26.
nfilt = 30,
# The FFT size. Default is 512.
nfft = 512,
# If true, the zeroth cepstral coefficient is replaced
# with the log of the total frame energy.
appendEnergy = True)
mfcc_feature = preprocessing.scale(mfcc_feature)
deltas = delta(mfcc_feature, 2)
double_deltas = delta(deltas, 2)
combined = np.hstack((mfcc_feature, deltas, double_deltas))
return combined