HappyPulse / app.py
feliponi's picture
Bug fdp! hahaha
638fe90
import gradio as gr
import json
import soundfile as sf
import torchaudio
import numpy as np
import torch
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
# 1. Portuguese ASR model
asr_pipeline = pipeline(
"automatic-speech-recognition",
model="facebook/wav2vec2-large-xlsr-53-portuguese" # Speech-to-text for Portuguese
)
# 2. Portuguese sentiment analysis model
MODEL_NAME = "lipaoMai/BERT-sentiment-analysis-portuguese" # BERT model for Portuguese sentiment analysis
sentiment_model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
sentiment_pipeline = pipeline("sentiment-analysis", model=sentiment_model, tokenizer=tokenizer)
def transcribe_audio(audio_file):
"""Transcreve o áudio garantindo a taxa de amostragem correta e o formato adequado para o modelo."""
# Carrega o áudio e obtém a taxa de amostragem
audio_data, sample_rate = sf.read(audio_file, dtype="float32") # Garante que os dados sejam float32
# Converte para array NumPy (garantindo que seja um array unidimensional)
if len(audio_data.shape) > 1: # Se for estéreo, converte para mono
audio_data = np.mean(audio_data, axis=1)
# Converte para tensor float32 do PyTorch antes de resamplear
audio_tensor = torch.tensor(audio_data, dtype=torch.float32)
# Se a taxa de amostragem não for 16 kHz, converte
if sample_rate != 16000:
resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)
audio_tensor = resampler(audio_tensor)
sample_rate = 16000 # Atualiza a taxa de amostragem
# Passa o áudio para o pipeline ASR
result = asr_pipeline(audio_tensor.numpy()) # Converte de volta para NumPy antes de passar para o modelo
return result.get("text", "")
def analyze_sentiment(text):
"""Sentiment analysis for Portuguese text"""
result = sentiment_pipeline(text)
happiness_score = result[0]['score'] * 100
return happiness_score
def process_audio(audio_file):
"""
Complete the full process of transcribing and analyzing the sentiment of an audio file.
"""
transcript = transcribe_audio(audio_file)
happiness = analyze_sentiment(transcript)
return transcript, happiness
def interface_audio(audio):
transcript, happiness = process_audio(audio)
return transcript, f"Happiness Score: {happiness}%"
def process_text_log(json_file):
"""
Process a JSON file containing chat logs.
JSON must be a list of messages, where each item has a key 'message' or 'text'.
Applies sentiment analysis to each message and returns a summary.
"""
try:
data = json.load(open(json_file))
except Exception as e:
return f"Error reading JSON file: {e}", 0, 0
total_messages = 0
soma_happiness = 0
detalhes = ""
for item in data:
text = item.get("message") or item.get("text")
if text:
total_messages += 1
label, happiness_score = analyze_sentiment(text)
soma_happiness += happiness_score
detalhes += f"Message: {text}\nSentiment: {label}, Happiness: {happiness_score}%\n\n"
overall_happiness = round(soma_happiness / total_messages, 2) if total_messages > 0 else 0
summary = f"Total messages: {total_messages}\nHappiness Score: {overall_happiness}%\n\nDetails:\n" + detalhes
return summary, total_messages, overall_happiness
# Create the Gradio interface
with gr.Blocks() as demo:
gr.Markdown("# Analyze Audio and Chat Logs")
with gr.Tabs():
with gr.TabItem("Audio"):
gr.Markdown("## Audio Analysis")
audio_input = gr.Audio(sources=["microphone"], type="filepath", label="Record or Upload Audio")
transcript_output = gr.Textbox(label="Transcript")
kpi_output = gr.Textbox(label="Happiness Score")
audio_btn = gr.Button("Process Audio")
audio_btn.click(interface_audio, inputs=audio_input, outputs=[transcript_output, kpi_output])
with gr.TabItem("Chat Logs"):
gr.Markdown("## Chat logs (JSON)")
gr.Markdown("Upload a JSON file with a chat log. Each item should have a 'message' or 'text' key.")
json_input = gr.File(label="JSON File")
log_summary_output = gr.Textbox(label="Summary", lines=10)
total_messages_output = gr.Number(label="Total Messages")
overall_happiness_output = gr.Number(label="Happiness Score (%)")
json_btn = gr.Button("Process Chat Log")
json_btn.click(process_text_log, inputs=json_input, outputs=[log_summary_output, total_messages_output, overall_happiness_output])
gr.Markdown("""
# 🎙️ Welcome to HappyPulse!
## 🚀 How It Works
HappyPulse is a sentiment analysis tool that allows you to record your voice or upload an audio file and analyze the emotions behind your speech. Using AI-powered transcription and sentiment analysis, we provide insights into the emotional tone of your communication.
## 🎤 Try It Now!
### **Audio Analysis** 🎧
- Click **Record** and speak naturally, expressing different emotions.
- Upload an audio file if you prefer.
- Our AI will transcribe your speech and analyze its sentiment.
- Get instant feedback on the emotional tone detected!
### **Chat Log Analysis** 💬
- Upload a JSON file containing chat conversations.
- The system will analyze each message's sentiment.
- Understand customer emotions and improve interactions.
## 📊 Why Use HappyPulse?
✅ Understand emotional tones in conversations.
✅ Improve customer service by detecting frustration in real-time.
✅ Gain insights into communication effectiveness.
Give it a try and see how emotions influence speech and text! 🚀
""")
demo.launch()