from __future__ import absolute_import, division, print_function, unicode_literals import gradio as gr import os import librosa import librosa.display import numpy as np import shutil import random import string import warnings import datetime import tensorflow as tf from tqdm import tqdm from keras.models import Sequential from keras.layers import Dense from keras.utils import to_categorical from keras.layers import Flatten, Dropout, Activation from keras.layers import Conv2D, MaxPooling2D from keras.layers import BatchNormalization from sklearn.model_selection import train_test_split from tqdm import tqdm from save_data import flag warnings.filterwarnings("ignore") timestamp = datetime.datetime.now() current_date = timestamp.strftime('%d-%m-%Y') current_time = timestamp.strftime('%I:%M:%S') IP = '' cwd = os.getcwd() classLabels = ('Angry', 'Fear', 'Disgust', 'Happy', 'Sad', 'Surprised', 'Neutral') numLabels = len(classLabels) in_shape = (39,216) model = Sequential() model.add(Conv2D(8, (13, 13), input_shape=(in_shape[0], in_shape[1], 1))) model.add(BatchNormalization(axis=-1)) model.add(Activation('relu')) model.add(Conv2D(8, (13, 13))) model.add(BatchNormalization(axis=-1)) model.add(Activation('relu')) model.add(MaxPooling2D(pool_size=(2, 1))) model.add(Conv2D(8, (3, 3))) model.add(BatchNormalization(axis=-1)) model.add(Activation('relu')) model.add(Conv2D(8, (1, 1))) model.add(BatchNormalization(axis=-1)) model.add(Activation('relu')) model.add(MaxPooling2D(pool_size=(2, 1))) model.add(Flatten()) model.add(Dense(64)) model.add(BatchNormalization()) model.add(Activation('relu')) model.add(Dropout(0.2)) model.add(Dense(numLabels, activation='softmax')) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) model.load_weights('speech_emotion_detection_ravdess_savee.h5') def selected_audio(audio): try: if audio and audio != 'Please select any of the following options': post_file_name = audio.lower() + '.wav' filepath = os.path.join("pre_recoreded",post_file_name) if os.path.exists(filepath): print("SELECT file name => ",filepath) result = predict_speech_emotion(filepath) print("result = ",result) return result except Exception as e: print(e) return "ERROR" def recorded_audio(audio): get_audio_name = '' final_output = '' if audio: get_audio_name = ''.join([random.choice(string.ascii_letters + string.digits) for n in range(5)]) get_audio_name = get_audio_name + '.wav' audio_file_path = audio.name final_output = predict_speech_emotion(audio_file_path) flag(audio_file_path,get_audio_name,final_output) return final_output else: raise gr.Error("Please record audio first!!!!") def predict_speech_emotion(filepath): if os.path.exists(filepath): print("last file name => ",filepath) X, sample_rate = librosa.load(filepath, res_type='kaiser_best',duration=2.5,sr=22050*2,offset=0.5) sample_rate = np.array(sample_rate) mfccs = librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=39) feature = mfccs feature = feature.reshape(39, 216, 1) # np_array = np.array([feature]) np_array = np.array([feature]) prediction = model.predict(np_array) np_argmax = np.argmax(prediction) result = classLabels[np_argmax] return result def return_audio_clip(audio_text): post_file_name = audio_text.lower() + '.wav' filepath = os.path.join("pre_recoreded",post_file_name) return filepath with gr.Blocks(css=".gradio-container {background-color: lightgray;} #btn {background-color: orange;}") as blocks: gr.Markdown("

" + "Audio Emotion Detection" + "

") with gr.Row(): with gr.Column(): input_audio_text = gr.Dropdown(label="Input Audio",choices=["Please select any of the following options","Angry", "Happy", "Sad", "Disgust","Fear", "Surprise", "Neutral"],value='Please select any of the following options',interactive=True) audio_ui=gr.Audio() input_audio_text.change(return_audio_clip,input_audio_text,audio_ui) output_text = gr.Textbox(label="Detected Emotion!") sub_btn = gr.Button("Detect Emotion",elem_id="btn") with gr.Column(): audio=gr.Audio(label="Recored audio",source="microphone", type="file") recorded_text = gr.Textbox(label="Detected Emotion!") with gr.Column(): sub_btn2 = gr.Button("Detect Emotion",elem_id="btn") gr.Markdown("""

Feel free to give us your feedback and contact us at letstalk@pragnakalp.com if you want to have your own Speech emotion detection system. We are just one click away. And don't forget to check out more interesting NLP services we are offering.

Developed by: Pragnakalp Techlabs

""") sub_btn.click(selected_audio, inputs=input_audio_text, outputs=output_text) sub_btn2.click(recorded_audio, inputs=audio, outputs=recorded_text) blocks.launch()