import pandas as pd import numpy as np import librosa import sklearn from sklearn.preprocessing import StandardScaler, OneHotEncoder from sklearn.model_selection import train_test_split import tensorflow as tf from keras.models import load_model import pickle sample_rate = 22050 def noise(data): noise_value = 0.015 * np.random.uniform() * np.amax(data) data = data + noise_value * np.random.normal(size=data.shape[0]) return data def stretch(data, rate=0.8): return librosa.effects.time_stretch(data, rate=rate) def shift(data): shift_range = int(np.random.uniform(low=-5, high=5) * 1000) return np.roll(data, shift_range) def pitch(data,sampling_rate,pitch_factor=0.7): return librosa.effects.pitch_shift(data,sr=sampling_rate, n_steps=pitch_factor) def extract_process(data): sample_rate = 22050 output_result = np.array([]) mean_zero = np.mean(librosa.feature.zero_crossing_rate(y=data).T,axis=0) output_result = np.hstack((output_result,mean_zero)) stft_out = np.abs(librosa.stft(data)) chroma_stft = np.mean(librosa.feature.chroma_stft(S=stft_out,sr=sample_rate).T,axis=0) output_result = np.hstack((output_result,chroma_stft)) mfcc_out = np.mean(librosa.feature.mfcc(y=data,sr=sample_rate).T,axis=0) output_result = np.hstack((output_result,mfcc_out)) root_mean_out = np.mean(librosa.feature.rms(y=data).T,axis=0) output_result = np.hstack((output_result,root_mean_out)) mel_spectogram = np.mean(librosa.feature.melspectrogram(y=data,sr=sample_rate).T,axis=0) output_result = np.hstack((output_result,mel_spectogram)) return output_result def export_process(path): data,sample_rate = librosa.load(path,duration=5,offset=1) output_1 = extract_process(data) result = np.array(output_1) noise_out = noise(data) output_2 = extract_process(noise_out) result = np.vstack((result,output_2)) new_out = stretch(data) strectch_pitch = pitch(new_out,sample_rate) output_3 = extract_process(strectch_pitch) result = np.vstack((result,output_3)) return result # Load X_train from Google Drive with open('X_train.pkl', 'rb') as f: X_train = pickle.load(f) # Load X_train from Google Drive with open('Y_train.pkl', 'rb') as f: Y_train = pickle.load(f) Features = pd.DataFrame(X_train) Features['labels'] = Y_train X = Features.iloc[: ,:-1].values Y = Features['labels'].values encoder_label = OneHotEncoder() Y = encoder_label.fit_transform(np.array(Y).reshape(-1,1)).toarray() x_train, x_test, y_train, y_test = train_test_split(X, Y, train_size=0.9, random_state=42, shuffle=True) scaler_data = StandardScaler() x_train = scaler_data.fit_transform(x_train) x_test = scaler_data.transform(x_test) def preprocess_audio(audio): #data, sample_rate = librosa.load(audio, duration=2.5, offset=0.6) features = export_process(audio) features = scaler_data.transform(features) return np.expand_dims(features, axis=2) # Function to predict emotion from preprocessed audio def predict_emotion(preprocessed_audio): model = load_model('speech-emotion-recognition.hdf5') prediction = model.predict(preprocessed_audio) predicted_emotion = encoder_label.inverse_transform(prediction) return predicted_emotion[0] # Live emotion recognition def live_emotion_recognition(audio_path): # Preprocess live audio preprocessed_audio = preprocess_audio(audio_path) # Predict emotion predicted_emotion = predict_emotion(preprocessed_audio) #print("Predicted Emotion:", predicted_emotion) return predicted_emotion[0]