Spaces:

Mohamed41
/

speech_emotion_recognition

Build error

App Files Files Community

speech_emotion_recognition / app.py

Mohamed41

Update app.py

8b108ed over 2 years ago

raw

history blame contribute delete

2.81 kB

	#IMPORT THE LIBRARIES
	import pandas as pd
	import numpy as np
	import joblib
	import os
	import sys

	# librosa is a Python library for analyzing audio and music. It can be used to extract the data from the audio files we will see it later.
	import librosa
	import librosa.display
	import seaborn as sns
	import matplotlib.pyplot as plt

	from sklearn.preprocessing import StandardScaler, OneHotEncoder
	from sklearn.metrics import confusion_matrix, classification_report
	from sklearn.model_selection import train_test_split

	# to play the audio files


	import keras
	from keras.preprocessing import sequence
	from keras.models import Sequential,model_from_json
	from keras.layers import Dense, Embedding
	from keras.layers import LSTM,BatchNormalization , GRU
	from keras.preprocessing.text import Tokenizer

	from tensorflow.keras.utils import to_categorical
	from keras.layers import Input, Flatten, Dropout, Activation
	from keras.layers import Conv1D, MaxPooling1D, AveragePooling1D
	from keras.models import Model
	from keras.callbacks import ModelCheckpoint
	from tensorflow.keras.optimizers import SGD



	import warnings
	if not sys.warnoptions:
	warnings.simplefilter("ignore")
	warnings.filterwarnings("ignore", category=DeprecationWarning)
	import tensorflow as tf
	from huggingface_hub import from_pretrained_keras
	import gradio as gr
	from huggingface_hub import from_pretrained_keras
	model=from_pretrained_keras( 'Mohamed41/MODEL_EMOTION_AR_TEXT_72P')

	def feat_ext(data):
	#Time_domain_features
	# ZCR Persody features or Low level ascoustic features
	result = np.array([])
	zcr = np.mean(librosa.feature.zero_crossing_rate(y=data).T, axis=0)
	result=np.hstack((result, zcr)) # stacking horizontally
	#Frequency_domain_features
	#Spectral and wavelet Features
	#MFCC
	mfcc = np.mean(librosa.feature.mfcc(y=data, sr=22050,n_mfcc=40).T, axis=0)
	result = np.hstack((result, mfcc)) # stacking horizontally
	return result


	scaler = joblib.load('scaler.joblib')
	encoder= joblib.load('encoder.joblib')

	def get_predict_feat(path):
	d, s_rate= librosa.load(path, duration=2.5, offset=0.6)
	res=feat_ext(d)
	result=np.array(res)
	result=np.reshape(result,newshape=(1,41))
	i_result = scaler.transform(result)
	final_result=np.expand_dims(i_result, axis=2)

	return final_result

	emotions1={1:'Neutral', 2:'Calm', 3:'Happy', 4:'Sad', 5:'Angry', 6:'Fear', 7:'Disgust',8:'Surprise'}
	def prediction(path1):
	res=get_predict_feat(path1)
	predictions=model.predict(res)
	y_pred = encoder.inverse_transform(predictions)
	return y_pred[0][0]
	def mainfunc(data):
	print(data)
	return str(data)
	audio_input = gr.inputs.Audio(type="filepath")
	iface = gr.Interface(fn=mainfunc, inputs=audio_input, outputs="text")
	iface.launch(inline=False)