Alesmikes's picture
Duplicate from ja-818/speech_and_text_emotion_recognition
c059522
import os
import gradio as gr
from models import infere_speech_emotion, infere_text_emotion, infere_voice2text
# Create a Gradio app object
with gr.Blocks() as demo:
gr.Markdown(
'''
# Speech and Text Emotion Recognition
## Determining someone's emotions can be challenging based solely on their tone or words
### This app uses both to provide a more accurate analysis of emotional expression in a single audio recording
'''
)
# Upload audio input and output fields
with gr.Tab("Upload audio"):
with gr.Row():
upload_input = gr.Audio(label="Audio file", source="upload", type="filepath")
with gr.Column():
upload_output_1 = gr.Textbox(label="Text from the audio")
upload_output_2 = gr.Textbox(label="Speech emotion")
upload_output_3 = gr.Textbox(label="Text emotion")
btn0 = gr.Button("Analyze audio")
# Examples to be used as input
gr.Examples(
[
os.path.join(os.path.dirname(__file__), "audio/a_good_dream.wav"),
os.path.join(os.path.dirname(__file__), "audio/hype_in_ai.wav"),
],
upload_input,
label="Examples in which speech and words express different emotions:"
)
# Input-output logic based on button click
btn0.click(fn=infere_voice2text, inputs=upload_input, outputs=upload_output_1)
btn0.click(fn=infere_speech_emotion, inputs=upload_input, outputs=upload_output_2)
upload_output_1.change(fn=infere_text_emotion, inputs=upload_output_1, outputs=upload_output_3)
# Record audio input and output fields
with gr.Tab("Record audio"):
with gr.Row():
record_input = gr.Audio(label="Audio recording", source="microphone", type="filepath")
with gr.Column():
record_output_1 = gr.Textbox(label="Text from the audio")
record_output_2 = gr.Textbox(label="Speech emotion")
record_output_3 = gr.Textbox(label="Text emotion")
btn1 = gr.Button("Analyze audio")
# Input-output logic based on button click
btn1.click(fn=infere_voice2text, inputs=record_input, outputs=record_output_1)
btn1.click(fn=infere_speech_emotion, inputs=record_input, outputs=record_output_2)
record_output_1.change(fn=infere_text_emotion, inputs=record_output_1, outputs=record_output_3)
demo.launch()