Spaces:
Build error
Build error
import numpy as np | |
from sklearn import preprocessing | |
from scipy.io.wavfile import read | |
from python_speech_features import mfcc | |
from python_speech_features import delta | |
class FeaturesExtractor: | |
def __init__(self): | |
pass | |
def extract_features(self, audio_path): | |
""" | |
Extract voice features including the Mel Frequency Cepstral Coefficient (MFCC) | |
from an audio using the python_speech_features module, performs Cepstral Mean | |
Normalization (CMS) and combine it with MFCC deltas and the MFCC double | |
deltas. | |
Args: | |
audio_path (str) : path to wave file without silent moments. | |
Returns: | |
(array) : Extracted features matrix. | |
""" | |
rate, audio = read(audio_path) | |
mfcc_feature = mfcc(# The audio signal from which to compute features. | |
audio, | |
# The samplerate of the signal we are working with. | |
rate, | |
# The length of the analysis window in seconds. | |
# Default is 0.025s (25 milliseconds) | |
winlen = 0.05, | |
# The step between successive windows in seconds. | |
# Default is 0.01s (10 milliseconds) | |
winstep = 0.01, | |
# The number of cepstrum to return. | |
# Default 13. | |
numcep = 5, | |
# The number of filters in the filterbank. | |
# Default is 26. | |
nfilt = 30, | |
# The FFT size. Default is 512. | |
nfft = 512, | |
# If true, the zeroth cepstral coefficient is replaced | |
# with the log of the total frame energy. | |
appendEnergy = True) | |
mfcc_feature = preprocessing.scale(mfcc_feature) | |
deltas = delta(mfcc_feature, 2) | |
double_deltas = delta(deltas, 2) | |
combined = np.hstack((mfcc_feature, deltas, double_deltas)) | |
return combined |