emotion_api / app.py
codetocare's picture
Update app.py
16dbaa3 verified
import gradio as gr
import torch
import torchaudio
from transformers import Wav2Vec2ForSequenceClassification, Wav2Vec2Processor
# Load model and processor from Hugging Face
model_name = "Dpngtm/wav2vec2-emotion-recognition"
processor = Wav2Vec2Processor.from_pretrained(model_name)
model = Wav2Vec2ForSequenceClassification.from_pretrained(model_name)
# Emotion labels from the model card
labels = ["angry", "calm", "disgust", "fearful", "happy", "neutral", "sad", "surprised"]
# Emotion prediction function
def predict_emotion(audio):
speech, sr = audio
if sr != 16000:
resampler = torchaudio.transforms.Resample(sr, 16000)
speech = resampler(torch.tensor(speech))
else:
speech = torch.tensor(speech)
input_values = processor(speech, sampling_rate=16000, return_tensors="pt").input_values
with torch.no_grad():
logits = model(input_values).logits
predicted_id = torch.argmax(logits, dim=-1).item()
emotion = labels[predicted_id]
return f"Predicted Emotion: **{emotion}**"
# Gradio interface
interface = gr.Interface(
fn=predict_emotion,
inputs=gr.Audio(source="microphone", type="numpy", label="Speak or Upload Audio"),
outputs=gr.Markdown(label="Detected Emotion"),
title="Voice Emotion Recognition",
description="This app detects the emotional tone of your speech using a fine-tuned Wav2Vec2 model."
)
interface.launch()