from __future__ import absolute_import, division, print_function, unicode_literals from flask import Flask, make_response, render_template, request, jsonify, redirect, url_for, send_from_directory import os import sys import pytz import librosa import shutil import random import string import warnings import datetime import librosa.display import numpy as np import tensorflow as tf import gradio as gr # import pyaudio # import wave from tqdm import tqdm from keras.models import Sequential from keras.layers import Dense from keras.utils import to_categorical from keras.layers import Flatten, Dropout, Activation from keras.layers import Conv2D, MaxPooling2D from keras.layers import BatchNormalization from sklearn.model_selection import train_test_split from save_data import flag warnings.filterwarnings("ignore") timestamp = datetime.datetime.now() current_date = timestamp.strftime('%d-%m-%Y') current_time = timestamp.strftime('%I:%M:%S') IP = '' cwd = os.getcwd() classLabels = ('Angry', 'Fear', 'Disgust', 'Happy', 'Sad', 'Surprised', 'Neutral') numLabels = len(classLabels) in_shape = (39,216) model = Sequential() model.add(Conv2D(8, (13, 13), input_shape=(in_shape[0], in_shape[1], 1))) model.add(BatchNormalization(axis=-1)) model.add(Activation('relu')) model.add(Conv2D(8, (13, 13))) model.add(BatchNormalization(axis=-1)) model.add(Activation('relu')) model.add(MaxPooling2D(pool_size=(2, 1))) model.add(Conv2D(8, (3, 3))) model.add(BatchNormalization(axis=-1)) model.add(Activation('relu')) model.add(Conv2D(8, (1, 1))) model.add(BatchNormalization(axis=-1)) model.add(Activation('relu')) model.add(MaxPooling2D(pool_size=(2, 1))) model.add(Flatten()) model.add(Dense(64)) model.add(BatchNormalization()) model.add(Activation('relu')) model.add(Dropout(0.2)) model.add(Dense(numLabels, activation='softmax')) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) # print(model.summary(), file=sys.stderr) model.load_weights('speech_emotion_detection_ravdess_savee.h5') # app = Flask(__name__) # app._static_folder = os.path.join( "/home/ubuntu/Desktop/nlpdemos/server_demos/speech_emotion/static" ) def selected_audio(audio): try: if audio and audio != 'Please select any of the following options': post_file_name = audio.lower() + '.wav' filepath = os.path.join("pre_recoreded",post_file_name) if os.path.exists(filepath): print("SELECT file name => ",filepath) result = predict_speech_emotion(filepath) print("result = ",result) return result except Exception as e: print(e) return "ERROR" def recorded_audio(audio): get_audio_name = '' final_output = '' if audio: get_audio_name = ''.join([random.choice(string.ascii_letters + string.digits) for n in range(5)]) get_audio_name = get_audio_name + '.wav' audio_file_path = audio.name final_output = predict_speech_emotion(audio_file_path) flag(audio_file_path,get_audio_name,final_output) return final_output else: raise gr.Error("Please record audio first!!!!") def predict_speech_emotion(filepath): if os.path.exists(filepath): print("last file name => ",filepath) X, sample_rate = librosa.load(filepath, res_type='kaiser_best',duration=2.5,sr=22050*2,offset=0.5) sample_rate = np.array(sample_rate) mfccs = librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=39) feature = mfccs feature = feature.reshape(39, 216, 1) # np_array = np.array([feature]) np_array = np.array([feature]) prediction = model.predict(np_array) np_argmax = np.argmax(prediction) result = classLabels[np_argmax] return result def return_audio_clip(audio_text): post_file_name = audio_text.lower() + '.wav' filepath = os.path.join("pre_recoreded",post_file_name) return filepath with gr.Blocks(css=".gradio-container {background-color: lightgray;} #btn {background-color: orange;}") as blocks: gr.Markdown("

" + "Audio Emotion Detection" + "

") with gr.Row(): with gr.Column(): input_audio_text = gr.Dropdown(lable="Input Audio",choices=["Please select any of the following options","Angry", "Happy", "Sad", "Disgust","Fear", "Surprise", "Neutral"],interactive=True) audio_ui=gr.Audio() input_audio_text.change(return_audio_clip,input_audio_text,audio_ui) output_text = gr.Textbox(lable="Prdicted emotion") sub_btn = gr.Button("Submit",elem_id="btn") gr.Button.style(sub_btn,bg_color='orange',text_color='white') with gr.Column(): audio=gr.Audio(labele="Recored audio",source="microphone", type="file") recorded_text = gr.Textbox(lable="Prdicted emotion") with gr.Column(): sub_btn2 = gr.Button("Submit") gr.Markdown("""

Feel free to give us your feedback and contact us at letstalk@pragnakalp.com if you want to have your own Speech emotion detection system. We are just one click away. And don't forget to check out more interesting NLP services we are offering.

we are offering.

Developed by : Pragnakalp Techlabs

""") sub_btn.click(selected_audio, inputs=input_audio_text, outputs=output_text) sub_btn2.click(recorded_audio, inputs=audio, outputs=recorded_text) blocks.launch()