|
|
|
"""app.ipynb |
|
|
|
Automatically generated by Colaboratory. |
|
|
|
Original file is located at |
|
https://colab.research.google.com/drive/1i-Mo3pDk4cm6BpSRL37pXCvCqWNBlO7X |
|
""" |
|
|
|
from __future__ import annotations |
|
|
|
import gradio as gr |
|
import whisper |
|
from transformers import pipeline |
|
from gradio.themes.base import Base |
|
from gradio.themes.utils import colors, fonts, sizes |
|
from typing import Iterable |
|
import os |
|
os.environ["TOKENIZERS_PARALLELISM"] = "false" |
|
import matplotlib |
|
matplotlib.use('TkAgg') |
|
import matplotlib.pyplot as plt |
|
|
|
|
|
model = whisper.load_model("base") |
|
sentiment_analysis = pipeline("sentiment-analysis", framework="pt", model="SamLowe/roberta-base-go_emotions") |
|
|
|
def analyze_sentiment(text): |
|
results = sentiment_analysis(text) |
|
sentiment_results = {result['label']: result['score'] for result in results} |
|
return sentiment_results |
|
|
|
def get_sentiment_emoji(sentiment): |
|
|
|
emoji_mapping = { |
|
"disappointment": "๐", |
|
"sadness": "๐ข", |
|
"annoyance": "๐ ", |
|
"neutral": "๐", |
|
"disapproval": "๐", |
|
"realization": "๐ฎ", |
|
"nervousness": "๐ฌ", |
|
"approval": "๐", |
|
"joy": "๐", |
|
"anger": "๐ก", |
|
"embarrassment": "๐ณ", |
|
"caring": "๐ค", |
|
"remorse": "๐", |
|
"disgust": "๐คข", |
|
"grief": "๐ฅ", |
|
"confusion": "๐", |
|
"relief": "๐", |
|
"desire": "๐", |
|
"admiration": "๐", |
|
"optimism": "๐", |
|
"fear": "๐จ", |
|
"love": "โค๏ธ", |
|
"excitement": "๐", |
|
"curiosity": "๐ค", |
|
"amusement": "๐", |
|
"surprise": "๐ฒ", |
|
"gratitude": "๐", |
|
"pride": "๐ฆ" |
|
} |
|
return emoji_mapping.get(sentiment, "") |
|
|
|
def display_sentiment_results(sentiment_results, option): |
|
sentiment_text = "" |
|
for sentiment, score in sentiment_results.items(): |
|
emoji = get_sentiment_emoji(sentiment) |
|
if option == "Sentiment Only": |
|
sentiment_text += f"{sentiment} {emoji}\n" |
|
elif option == "Sentiment + Score": |
|
sentiment_text += f"{sentiment} {emoji}: {score}\n" |
|
return sentiment_text |
|
|
|
def inference(audio, sentiment_option): |
|
audio = whisper.load_audio(audio) |
|
audio = whisper.pad_or_trim(audio) |
|
|
|
mel = whisper.log_mel_spectrogram(audio).to(model.device) |
|
|
|
_, probs = model.detect_language(mel) |
|
lang = max(probs, key=probs.get) |
|
|
|
options = whisper.DecodingOptions(fp16=False) |
|
result = whisper.decode(model, mel, options) |
|
|
|
sentiment_results = analyze_sentiment(result.text) |
|
sentiment_output = display_sentiment_results(sentiment_results, sentiment_option) |
|
|
|
return lang.upper(), result.text, sentiment_output |
|
|
|
title = """<h1 align="center">Audio Sentiment Analysis</h1>""" |
|
subtitle = """<h6 align="center">Automatic Speech Recognition</h6>""" |
|
image_path = "/Users/rayespinoza/PycharmProjects/AnalyticsProjects/Styles/Arquitecture.jpg" |
|
description = """ |
|
<p align="justify">With cross-modal interaction and AI (tools and pre-trained models in NLP), we can analyze large audio data |
|
in real-time, such as recorded conversations, customer service calls, or voice recordings, in order to identify and categorize |
|
emotions (from positive and neutral to sad and angry.</p><br> |
|
|
|
Components of the tool:<br> |
|
- Input: Real-time multilingual<br> |
|
- Video Call speech recognition<br> |
|
- Pre-trained model: Whisper<br> |
|
- Model size: Large with 769M Parameters<br> |
|
- Encoder/Decoder Arquitecture <br> |
|
- Transcribe, Translate, and Identify Audio<br> |
|
- Output: Sentiment analysis<br> |
|
<br> |
|
""" |
|
|
|
custom_css = """ |
|
banner-image { |
|
margin-left: auto; |
|
margin-right: auto; |
|
} |
|
chat-message { |
|
font-size: 300px; |
|
min-height: 600px; |
|
} |
|
|
|
img { |
|
border-radius: 8px; |
|
max-width: 100%; |
|
height: auto; |
|
} |
|
|
|
""" |
|
|
|
|
|
|
|
class Seafoam(Base): |
|
def __init__( |
|
self, |
|
*, |
|
primary_hue: colors.Color | str = colors.emerald, |
|
secondary_hue: colors.Color | str = colors.blue, |
|
neutral_hue: colors.Color | str = colors.blue, |
|
spacing_size: sizes.Size | str = sizes.spacing_md, |
|
radius_size: sizes.Size | str = sizes.radius_md, |
|
text_size: sizes.Size | str = sizes.text_lg, |
|
font: fonts.Font |
|
| str |
|
| Iterable[fonts.Font | str] = ( |
|
fonts.GoogleFont("Quicksand"), |
|
"ui-sans-serif", |
|
"sans-serif", |
|
), |
|
font_mono: fonts.Font |
|
| str |
|
| Iterable[fonts.Font | str] = ( |
|
fonts.GoogleFont("IBM Plex Mono"), |
|
"ui-monospace", |
|
"monospace", |
|
), |
|
): |
|
super().__init__( |
|
primary_hue=primary_hue, |
|
secondary_hue=secondary_hue, |
|
neutral_hue=neutral_hue, |
|
spacing_size=spacing_size, |
|
radius_size=radius_size, |
|
text_size=text_size, |
|
font=font, |
|
font_mono=font_mono, |
|
) |
|
super().set( |
|
body_background_fill="repeating-linear-gradient(45deg, *primary_200, *primary_200 10px, *primary_50 10px, *primary_50 20px)", |
|
body_background_fill_dark="repeating-linear-gradient(45deg, *primary_800, *primary_800 10px, *primary_900 10px, *primary_900 20px)", |
|
button_primary_background_fill="linear-gradient(90deg, *primary_300, *secondary_400)", |
|
button_primary_background_fill_hover="linear-gradient(90deg, *primary_200, *secondary_300)", |
|
button_primary_text_color="white", |
|
button_primary_background_fill_dark="linear-gradient(90deg, *primary_600, *secondary_800)", |
|
slider_color="*secondary_300", |
|
slider_color_dark="*secondary_600", |
|
block_title_text_weight="600", |
|
block_border_width="3px", |
|
block_shadow="*shadow_drop_lg", |
|
button_shadow="*shadow_drop_lg", |
|
button_large_padding="32px", |
|
) |
|
|
|
|
|
seafoam = Seafoam() |
|
|
|
|
|
lock_symbol = '\U0001F512' |
|
unlock_symbol = '\U0001F513' |
|
switch_values_symbol = '\U000021C5' |
|
|
|
class FormRow(gr.Row, gr.components.FormComponent): |
|
"""Same as gr.Row but fits inside gradio forms""" |
|
|
|
def get_block_name(self): |
|
return "row" |
|
|
|
class ToolButton(gr.Button, gr.components.FormComponent): |
|
"""Small button with single emoji as text, fits inside gradio forms""" |
|
|
|
def __init__(self, **kwargs): |
|
super().__init__(variant="tool", **kwargs) |
|
|
|
def get_block_name(self): |
|
return "button" |
|
|
|
def toggle_aspect_ratio(btn): |
|
if btn == unlock_symbol: |
|
return gr.update(value = lock_symbol, variant="primary") |
|
else: |
|
return gr.update(value = unlock_symbol, variant="secondary") |
|
|
|
|
|
|
|
|
|
with open('styles.css', 'r') as f: |
|
css_app = f.read() |
|
|
|
|
|
block = gr.Blocks(css=custom_css, theme='gradio/default',title="Analytics Projects by Ray Espinoza") |
|
|
|
|
|
|
|
|
|
with block: |
|
gr.HTML(title) |
|
gr.HTML(subtitle) |
|
|
|
with gr.Row(): |
|
with gr.Column(scale=2): |
|
gr.Image(image_path, elem_id="banner-image", show_label=False, show_download_button=False) |
|
|
|
|
|
|
|
with gr.Column(): |
|
gr.HTML(description) |
|
|
|
with gr.Group(): |
|
with gr.Box(): |
|
audio = gr.Audio( |
|
label="Input Audio", |
|
show_label=False, |
|
source="microphone", |
|
type="filepath" |
|
) |
|
|
|
sentiment_option = gr.Radio( |
|
choices=["Sentiment Only", "Sentiment + Score"], |
|
label="Select an option", |
|
default="Sentiment Only" |
|
) |
|
|
|
btn = gr.Button("Execute: Transcribe",variant="primary") |
|
|
|
lang_str = gr.Textbox(label="Language:") |
|
|
|
text = gr.Textbox(label="Transcription:") |
|
|
|
sentiment_output = gr.Textbox(label="Sentiment Analysis Results:", output=True) |
|
|
|
btn.click(inference, inputs=[audio, sentiment_option], outputs=[lang_str, text, sentiment_output]) |
|
|
|
gr.HTML(''' |
|
<div class="footer"> |
|
<p>By <a href="https://github.com" style="text-decoration: underline;" target="_blank"> Ray EH Github</a> |
|
</p> |
|
</div> |
|
''') |
|
|
|
block.launch() |