from __future__ import absolute_import, division, print_function, unicode_literals from flask import Flask, make_response, render_template, request, jsonify, redirect, url_for, send_from_directory from flask_cors import CORS import sys import os import librosa import librosa.display import numpy as np from datetime import date import re import json import email import csv import datetime import smtplib import ssl from email.mime.text import MIMEText import time import pytz import requests import pyaudio import wave import shutil import warnings import tensorflow as tf import gradio as gr from keras.models import Sequential from keras.layers import Dense from keras.utils import to_categorical from keras.layers import Flatten, Dropout, Activation from keras.layers import Conv2D, MaxPooling2D from keras.layers import BatchNormalization from sklearn.model_selection import train_test_split from tqdm import tqdm warnings.filterwarnings("ignore") timestamp = datetime.datetime.now() current_date = timestamp.strftime('%d-%m-%Y') current_time = timestamp.strftime('%I:%M:%S') IP = '' cwd = os.getcwd() classLabels = ('Angry', 'Fear', 'Disgust', 'Happy', 'Sad', 'Surprised', 'Neutral') numLabels = len(classLabels) in_shape = (39,216) model = Sequential() model.add(Conv2D(8, (13, 13), input_shape=(in_shape[0], in_shape[1], 1))) model.add(BatchNormalization(axis=-1)) model.add(Activation('relu')) model.add(Conv2D(8, (13, 13))) model.add(BatchNormalization(axis=-1)) model.add(Activation('relu')) model.add(MaxPooling2D(pool_size=(2, 1))) model.add(Conv2D(8, (3, 3))) model.add(BatchNormalization(axis=-1)) model.add(Activation('relu')) model.add(Conv2D(8, (1, 1))) model.add(BatchNormalization(axis=-1)) model.add(Activation('relu')) model.add(MaxPooling2D(pool_size=(2, 1))) model.add(Flatten()) model.add(Dense(64)) model.add(BatchNormalization()) model.add(Activation('relu')) model.add(Dropout(0.2)) model.add(Dense(numLabels, activation='softmax')) model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) # print(model.summary(), file=sys.stderr) model.load_weights('speech_emotion_detection_ravdess_savee.h5') # app = Flask(__name__) # app._static_folder = os.path.join( "/home/ubuntu/Desktop/nlpdemos/server_demos/speech_emotion/static" ) def selected_audio(audio): if audio and audio != 'Please select any of the following options': post_file_name = audio.lower() + '.wav' filepath = os.path.join("pre_recoreded",post_file_name) if os.path.exists(filepath): print("SELECT file name => ",filepath) result = predict_speech_emotion(filepath) print("result = ",result) return result def recorded_audio(audio): try: fileList = os.listdir('recorded_audio') new_wav_file = "" if(fileList): filename_list = [] for i in fileList: filename = i.split('.')[0] filename_list.append(int(filename)) max_file = max(filename_list) new_wav_file = int(max_file) + 1 else: new_wav_file="1" new_wav_file = str(new_wav_file) + ".wav" # filepath = os.path.join('recorded_audio', new_wav_file) # shutil.move(recorded_audio, filepath) filepath = 'recorded_audio/22.wav' result = predict_speech_emotion(audio.name) return result except Exception as e: print(e) return "ERROR" def predict_speech_emotion(filepath): if os.path.exists(filepath): print("last file name => ",filepath) X, sample_rate = librosa.load(filepath, res_type='kaiser_best',duration=2.5,sr=22050*2,offset=0.5) sample_rate = np.array(sample_rate) mfccs = librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=39) feature = mfccs feature = feature.reshape(39, 216, 1) # np_array = np.array([feature]) np_array = np.array([feature]) prediction = model.predict(np_array) np_argmax = np.argmax(prediction) result = classLabels[np_argmax] return result # demo = gr.Interface( # fn=send_audio, # inputs=gr.Audio(source="microphone", type="filepath"), # outputs="text") # demo.launch() # selected_audio = gr.Dropdown(["Angry", "Happy", "Sad", "Disgust","Fear", "Surprise", "Neutral"], # lable = "Input Audio") # audio_ui=gr.Audio() # text = gr.Textbox() # demo = gr.Interface( # fn=send_audio, # inputs=selected_audio, # outputs=[audio_ui,text]) # demo.launch() def return_audio_clip(audio_text): post_file_name = audio_text.lower() + '.wav' filepath = os.path.join("pre_recoreded",post_file_name) return filepath with gr.Blocks() as demo: gr.Markdown("Select audio or record audio") with gr.Row(): with gr.Column(): input_audio_text = gr.Dropdown(["Please select any of the following options","Angry", "Happy", "Sad", "Disgust","Fear", "Surprise", "Neutral"], lable = "Input Audio",interactive=True) audio_ui=gr.Audio() input_audio_text.change(return_audio_clip,input_audio_text,audio_ui) output_text = gr.Textbox(lable="Prdicted emotion") sub_btn = gr.Button("Submit") with gr.Column(): audio=gr.Audio(source="microphone", type="file",labele="Recored audio") recorded_text = gr.Textbox(lable="Prdicted emotion") with gr.Column(): sub_btn2 = gr.Button("Submit") sub_btn.click(selected_audio, inputs=input_audio_text, outputs=output_text) sub_btn2.click(recorded_audio, inputs=audio, outputs=recorded_text) demo.launch()