ML-with-Rajibul's picture
Update SER.py
9fa244e verified
raw
history blame
No virus
3.61 kB
import pandas as pd
import numpy as np
import librosa
import sklearn
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import train_test_split
import tensorflow as tf
from keras.models import load_model
import pickle
sample_rate = 22050
def noise(data):
noise_value = 0.015 * np.random.uniform() * np.amax(data)
data = data + noise_value * np.random.normal(size=data.shape[0])
return data
def stretch(data, rate=0.8):
return librosa.effects.time_stretch(data, rate=rate)
def shift(data):
shift_range = int(np.random.uniform(low=-5, high=5) * 1000)
return np.roll(data, shift_range)
def pitch(data,sampling_rate,pitch_factor=0.7):
return librosa.effects.pitch_shift(data,sr=sampling_rate, n_steps=pitch_factor)
def extract_process(data):
sample_rate = 22050
output_result = np.array([])
mean_zero = np.mean(librosa.feature.zero_crossing_rate(y=data).T,axis=0)
output_result = np.hstack((output_result,mean_zero))
stft_out = np.abs(librosa.stft(data))
chroma_stft = np.mean(librosa.feature.chroma_stft(S=stft_out,sr=sample_rate).T,axis=0)
output_result = np.hstack((output_result,chroma_stft))
mfcc_out = np.mean(librosa.feature.mfcc(y=data,sr=sample_rate).T,axis=0)
output_result = np.hstack((output_result,mfcc_out))
root_mean_out = np.mean(librosa.feature.rms(y=data).T,axis=0)
output_result = np.hstack((output_result,root_mean_out))
mel_spectogram = np.mean(librosa.feature.melspectrogram(y=data,sr=sample_rate).T,axis=0)
output_result = np.hstack((output_result,mel_spectogram))
return output_result
def export_process(path):
data,sample_rate = librosa.load(path,duration=5,offset=1)
output_1 = extract_process(data)
result = np.array(output_1)
noise_out = noise(data)
output_2 = extract_process(noise_out)
result = np.vstack((result,output_2))
new_out = stretch(data)
strectch_pitch = pitch(new_out,sample_rate)
output_3 = extract_process(strectch_pitch)
result = np.vstack((result,output_3))
return result
# Load X_train from Google Drive
with open('X_train.pkl', 'rb') as f:
X_train = pickle.load(f)
# Load X_train from Google Drive
with open('Y_train.pkl', 'rb') as f:
Y_train = pickle.load(f)
Features = pd.DataFrame(X_train)
Features['labels'] = Y_train
X = Features.iloc[: ,:-1].values
Y = Features['labels'].values
encoder_label = OneHotEncoder()
Y = encoder_label.fit_transform(np.array(Y).reshape(-1,1)).toarray()
x_train, x_test, y_train, y_test = train_test_split(X, Y, train_size=0.9, random_state=42, shuffle=True)
scaler_data = StandardScaler()
x_train = scaler_data.fit_transform(x_train)
x_test = scaler_data.transform(x_test)
def preprocess_audio(audio):
#data, sample_rate = librosa.load(audio, duration=2.5, offset=0.6)
features = export_process(audio)
features = scaler_data.transform(features)
return np.expand_dims(features, axis=2)
# Function to predict emotion from preprocessed audio
def predict_emotion(preprocessed_audio):
model = load_model('speech-emotion-recognition.hdf5')
prediction = model.predict(preprocessed_audio)
predicted_emotion = encoder_label.inverse_transform(prediction)
return predicted_emotion[0]
# Live emotion recognition
def live_emotion_recognition(audio_path):
# Preprocess live audio
preprocessed_audio = preprocess_audio(audio_path)
# Predict emotion
predicted_emotion = predict_emotion(preprocessed_audio)
#print("Predicted Emotion:", predicted_emotion)
return predicted_emotion[0]