|
from __future__ import absolute_import, division, print_function, unicode_literals |
|
|
|
import os |
|
import librosa |
|
import librosa.display |
|
import numpy as np |
|
import shutil |
|
import random |
|
import string |
|
import warnings |
|
import datetime |
|
import gradio as gr |
|
import tensorflow as tf |
|
|
|
from tqdm import tqdm |
|
from keras.models import Sequential |
|
from keras.layers import Dense |
|
from keras.utils import to_categorical |
|
from keras.layers import Flatten, Dropout, Activation |
|
from keras.layers import Conv2D, MaxPooling2D |
|
from keras.layers import BatchNormalization |
|
from sklearn.model_selection import train_test_split |
|
from tqdm import tqdm |
|
from save_data import flag |
|
|
|
warnings.filterwarnings("ignore") |
|
|
|
timestamp = datetime.datetime.now() |
|
current_date = timestamp.strftime('%d-%m-%Y') |
|
current_time = timestamp.strftime('%I:%M:%S') |
|
IP = '' |
|
cwd = os.getcwd() |
|
|
|
classLabels = ('Angry', 'Fear', 'Disgust', 'Happy', 'Sad', 'Surprised', 'Neutral') |
|
numLabels = len(classLabels) |
|
in_shape = (39,216) |
|
model = Sequential() |
|
|
|
model.add(Conv2D(8, (13, 13), input_shape=(in_shape[0], in_shape[1], 1))) |
|
model.add(BatchNormalization(axis=-1)) |
|
model.add(Activation('relu')) |
|
model.add(Conv2D(8, (13, 13))) |
|
model.add(BatchNormalization(axis=-1)) |
|
model.add(Activation('relu')) |
|
model.add(MaxPooling2D(pool_size=(2, 1))) |
|
model.add(Conv2D(8, (3, 3))) |
|
model.add(BatchNormalization(axis=-1)) |
|
model.add(Activation('relu')) |
|
model.add(Conv2D(8, (1, 1))) |
|
model.add(BatchNormalization(axis=-1)) |
|
model.add(Activation('relu')) |
|
model.add(MaxPooling2D(pool_size=(2, 1))) |
|
model.add(Flatten()) |
|
model.add(Dense(64)) |
|
model.add(BatchNormalization()) |
|
model.add(Activation('relu')) |
|
model.add(Dropout(0.2)) |
|
|
|
model.add(Dense(numLabels, activation='softmax')) |
|
model.compile(loss='binary_crossentropy', optimizer='adam', |
|
metrics=['accuracy']) |
|
|
|
model.load_weights('speech_emotion_detection_ravdess_savee.h5') |
|
|
|
def selected_audio(audio): |
|
try: |
|
if audio and audio != 'Please select any of the following options': |
|
post_file_name = audio.lower() + '.wav' |
|
|
|
filepath = os.path.join("pre_recoreded",post_file_name) |
|
if os.path.exists(filepath): |
|
print("SELECT file name => ",filepath) |
|
result = predict_speech_emotion(filepath) |
|
print("result = ",result) |
|
|
|
return result |
|
except Exception as e: |
|
print(e) |
|
return "ERROR" |
|
|
|
def recorded_audio(audio): |
|
|
|
get_audio_name = '' |
|
final_output = '' |
|
if audio: |
|
get_audio_name = ''.join([random.choice(string.ascii_letters + string.digits) for n in range(5)]) |
|
get_audio_name = get_audio_name + '.wav' |
|
audio_file_path = audio.name |
|
final_output = predict_speech_emotion(audio_file_path) |
|
|
|
flag(audio_file_path,get_audio_name,final_output) |
|
return final_output |
|
else: |
|
raise gr.Error("Please record audio first!!!!") |
|
|
|
|
|
def predict_speech_emotion(filepath): |
|
if os.path.exists(filepath): |
|
print("last file name => ",filepath) |
|
X, sample_rate = librosa.load(filepath, res_type='kaiser_best',duration=2.5,sr=22050*2,offset=0.5) |
|
sample_rate = np.array(sample_rate) |
|
mfccs = librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=39) |
|
feature = mfccs |
|
feature = feature.reshape(39, 216, 1) |
|
|
|
np_array = np.array([feature]) |
|
prediction = model.predict(np_array) |
|
np_argmax = np.argmax(prediction) |
|
result = classLabels[np_argmax] |
|
return result |
|
|
|
|
|
def return_audio_clip(audio_text): |
|
post_file_name = audio_text.lower() + '.wav' |
|
filepath = os.path.join("pre_recoreded",post_file_name) |
|
return filepath |
|
|
|
with gr.Blocks(css=".gradio-container {background-color: lightgray;} #btn {background-color: orange;}") as blocks: |
|
gr.Markdown("<h1 style='text-align: center; margin-bottom: 1rem'>" |
|
+ "Audio Emotion Detection" |
|
+ "</h1>") |
|
with gr.Row(): |
|
with gr.Column(): |
|
input_audio_text = gr.Dropdown(label="Input Audio",choices=["Please select any of the following options","Angry", "Happy", "Sad", "Disgust","Fear", "Surprise", "Neutral"],value='Please select any of the following options',interactive=True) |
|
audio_ui=gr.Audio() |
|
input_audio_text.change(return_audio_clip,input_audio_text,audio_ui) |
|
|
|
output_text = gr.Textbox(label="Detected Emotion!") |
|
sub_btn = gr.Button("Detect Emotion",elem_id="btn") |
|
|
|
with gr.Column(): |
|
audio=gr.Audio(label="Recored audio",source="microphone", type="file") |
|
recorded_text = gr.Textbox(label="Detected Emotion!") |
|
with gr.Column(): |
|
sub_btn2 = gr.Button("Detect Emotion",elem_id="btn") |
|
gr.Markdown("""<p style='text-align: center;'>Feel free to give us your <a href="https://www.pragnakalp.com/contact/" target="_blank">feedback</a> and contact us |
|
at <a href="mailto:letstalk@pragnakalp.com" target="_blank">letstalk@pragnakalp.com</a> if you want to have your own Speech emotion detection system. |
|
We are just one click away. And don't forget to check out more interesting |
|
<a href="https://www.pragnakalp.com/services/natural-language-processing-services/" target="_blank">NLP services</a> we are offering.</p> |
|
<p style='text-align: center;'>Developed by: <a href="https://www.pragnakalp.com" target="_blank">Pragnakalp Techlabs</a></p>""") |
|
sub_btn.click(selected_audio, inputs=input_audio_text, outputs=output_text) |
|
sub_btn2.click(recorded_audio, inputs=audio, outputs=recorded_text) |
|
|
|
blocks.launch() |