demo_apps / app.py
alvi123's picture
commit
c59d0a4
raw
history blame
3.15 kB
import gradio as gr
import librosa
import matplotlib.pyplot as plt
import plotly.express as px
from radar_chart import radar_factory
from keras.models import load_model
import os
import numpy as np
model = load_model(os.path.join("model", "Emotion_Voice_Detection_Model_tuned_2.h5"))
def convert_class_to_emotion(pred):
"""
Method to convert the predictions (int) into human readable strings.
"""
# label_conversion = {0: 'neutral',
# 1: 'calm',
# 2: 'happy',
# 3: 'sad',
# 4: 'angry',
# 5: 'fearful',
# 6: 'disgust',
# 7: 'surprised'}
label_conversion = {0: 'kata_sifat',
1: 'kata_benda',
2: 'kata_kerja',
3: 'kata_keterangan}
return label_conversion[int(pred)]
def make_predictions(file, micro=None):
"""
Method to process the files and create your features.
"""
if file is not None and micro is None:
input_audio = file
elif file is None and micro is not None:
input_audio = micro
else:
print("THERE IS A PROBLEM")
input_audio = file
data, sampling_rate = librosa.load(input_audio)
print(data)
print(f"THE SAMPLING RATE IS {sampling_rate}")
mfccs = np.mean(librosa.feature.mfcc(y=data, sr=sampling_rate, n_mfcc=40).T, axis=0)
x = np.expand_dims(mfccs, axis=1)
x = np.expand_dims(x, axis=0)
predictions = np.argmax(model.predict(x), axis=1)
N = 8
theta = radar_factory(N, frame='polygon')
spoke_labels = np.array(['kata_benda',
'kata_kerja',
'kata_keterangan',
'kata_sifat'])
fig_radar, axs = plt.subplots(figsize=(8, 8), nrows=1, ncols=1,
subplot_kw=dict(projection='radar'))
vec = model.predict(x)[0]
axs.plot(theta, vec, color="b")
axs.fill(theta, vec, alpha=0.3)
axs.set_varlabels(spoke_labels)
fig = plt.figure()
plt.plot(data, alpha=0.8)
plt.xlabel("temps")
plt.ylabel("amplitude")
return convert_class_to_emotion(predictions), fig, fig_radar
# Set the starting state to an empty string
iface = gr.Interface(
fn=make_predictions,
title="identify emotion of a chunk of audio speech",
description="a simple interface to perform emotion recognition from an audio file",
article="Author: <a href=\"https://huggingface.co/poisso\">Poisso</a>.",
inputs=[gr.Audio(source="upload", type="filepath", label="File"),
gr.Audio(source="microphone", type="filepath", streaming=False, label="Microphone")]
,
examples=[[os.path.join("examples", filename)] for filename in os.listdir("examples")],
outputs=[gr.Textbox(label="Text output"), gr.Plot(), gr.Plot()]
)
iface.launch(debug=True)