Spaces:
Runtime error
Runtime error
import gradio as gr | |
import whisper | |
from transformers import pipeline | |
model = whisper.load_model("base") | |
sentiment_analysis = pipeline("sentiment-analysis", framework="pt", model="SamLowe/roberta-base-go_emotions") | |
def analyze_sentiment(text): | |
results = sentiment_analysis(text) | |
sentiment_results = {result['label']: result['score'] for result in results} | |
return sentiment_results | |
def get_sentiment_emoji(sentiment): | |
# Define the emojis corresponding to each sentiment | |
emoji_mapping = { | |
"disappointment": "๐", | |
"sadness": "๐ข", | |
"annoyance": "๐ ", | |
"neutral": "๐", | |
"disapproval": "๐", | |
"realization": "๐ฎ", | |
"nervousness": "๐ฌ", | |
"approval": "๐", | |
"joy": "๐", | |
"anger": "๐ก", | |
"embarrassment": "๐ณ", | |
"caring": "๐ค", | |
"remorse": "๐", | |
"disgust": "๐คข", | |
"grief": "๐ฅ", | |
"confusion": "๐", | |
"relief": "๐", | |
"desire": "๐", | |
"admiration": "๐", | |
"optimism": "๐", | |
"fear": "๐จ", | |
"love": "โค๏ธ", | |
"excitement": "๐", | |
"curiosity": "๐ค", | |
"amusement": "๐", | |
"surprise": "๐ฒ", | |
"gratitude": "๐", | |
"pride": "๐ฆ" | |
} | |
return emoji_mapping.get(sentiment, "") | |
def display_sentiment_results(sentiment_results, option): | |
sentiment_text = "" | |
for sentiment, score in sentiment_results.items(): | |
emoji = get_sentiment_emoji(sentiment) | |
if option == "Sentiment Only": | |
sentiment_text += f"{sentiment} {emoji}\n" | |
elif option == "Sentiment + Score": | |
sentiment_text += f"{sentiment} {emoji}: {score}\n" | |
return sentiment_text | |
def inference(audio, sentiment_option): | |
audio = whisper.load_audio(audio) | |
audio = whisper.pad_or_trim(audio) | |
mel = whisper.log_mel_spectrogram(audio).to(model.device) | |
_, probs = model.detect_language(mel) | |
lang = max(probs, key=probs.get) | |
options = whisper.DecodingOptions(fp16=False) | |
result = whisper.decode(model, mel, options) | |
sentiment_results = analyze_sentiment(result.text) | |
sentiment_output = display_sentiment_results(sentiment_results, sentiment_option) | |
return lang.upper(), result.text, sentiment_output | |
title = """<h1 align="center">โ Lim Kopi ๐ฌ</h1>""" | |
image_path = "coffee_logo.jpg" | |
description = """ | |
๐ป This MVP shows how we can use Whisper to conduct audio sentiment analysis on voice recordings of customer service agents. Whisper is a general speech recognition model built by OpenAI. It is trained on a large dataset of diverse audio and supports multilingual speech recognition, speech translation, and language identification tasks.<br><br> | |
โ๏ธ MVP Components:<br> | |
<br> | |
- Real-time multilingual speech recognition<br> | |
- Language identification<br> | |
- Sentiment analysis of the transcriptions<br> | |
<br> | |
๐ฏ The sentiment analysis results are provided as a dictionary with different emotions and their corresponding scores, so customer service agents can receive feedback on the overall call quality and customer receptiveness.<br> | |
<br> | |
๐ The sentiment analysis results are displayed with emojis representing the corresponding sentiment.<br> | |
<br> | |
โ The higher the score for a specific emotion, the stronger the presence of that emotion in the transcribed text.<br> | |
<br> | |
โ Use the microphone for real-time speech recognition.<br> | |
<br> | |
โก๏ธ The model will transcribe the audio for record-keeping, and perform sentiment analysis on the transcribed text.<br> | |
""" | |
custom_css = """ | |
#banner-image { | |
display: block; | |
margin-left: auto; | |
margin-right: auto; | |
} | |
#chat-message { | |
font-size: 14px; | |
min-height: 300px; | |
} | |
.svelte-1mwvhlq { | |
display: none !important; | |
} | |
""" | |
block = gr.Blocks(title="Lim Kopi Call Center Service", css=custom_css) | |
with block: | |
gr.HTML(title) | |
with gr.Row(): | |
with gr.Column(): | |
gr.Image(image_path, elem_id="banner-image", show_label=False) | |
with gr.Column(): | |
gr.HTML(description) | |
with gr.Group(): | |
with gr.Box(): | |
sentiment_option = gr.Radio( | |
choices=["Sentiment Only", "Sentiment + Score"], | |
label="Select an option", | |
) | |
audio = gr.Audio( | |
source="microphone", | |
type="filepath" | |
) | |
with gr.Box(): | |
btn = gr.Button("Transcribe") | |
lang_str = gr.Textbox(label="Language") | |
text = gr.Textbox(label="Transcription") | |
sentiment_output = gr.Textbox(label="Sentiment Analysis Results") | |
btn.click(inference, inputs=[audio, sentiment_option], outputs=[lang_str, text, sentiment_output]) | |
block.launch() | |