Spaces:
Runtime error
Runtime error
import gradio as gr | |
import json | |
import soundfile as sf | |
import torchaudio | |
import numpy as np | |
import torch | |
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer | |
# 1. Portuguese ASR model | |
asr_pipeline = pipeline( | |
"automatic-speech-recognition", | |
model="facebook/wav2vec2-large-xlsr-53-portuguese" # Speech-to-text for Portuguese | |
) | |
# 2. Portuguese sentiment analysis model | |
MODEL_NAME = "lipaoMai/BERT-sentiment-analysis-portuguese" # BERT model for Portuguese sentiment analysis | |
sentiment_model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME) | |
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) | |
sentiment_pipeline = pipeline("sentiment-analysis", model=sentiment_model, tokenizer=tokenizer) | |
def transcribe_audio(audio_file): | |
"""Transcreve o áudio garantindo a taxa de amostragem correta e o formato adequado para o modelo.""" | |
# Carrega o áudio e obtém a taxa de amostragem | |
audio_data, sample_rate = sf.read(audio_file, dtype="float32") # Garante que os dados sejam float32 | |
# Converte para array NumPy (garantindo que seja um array unidimensional) | |
if len(audio_data.shape) > 1: # Se for estéreo, converte para mono | |
audio_data = np.mean(audio_data, axis=1) | |
# Converte para tensor float32 do PyTorch antes de resamplear | |
audio_tensor = torch.tensor(audio_data, dtype=torch.float32) | |
# Se a taxa de amostragem não for 16 kHz, converte | |
if sample_rate != 16000: | |
resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000) | |
audio_tensor = resampler(audio_tensor) | |
sample_rate = 16000 # Atualiza a taxa de amostragem | |
# Passa o áudio para o pipeline ASR | |
result = asr_pipeline(audio_tensor.numpy()) # Converte de volta para NumPy antes de passar para o modelo | |
return result.get("text", "") | |
def analyze_sentiment(text): | |
"""Sentiment analysis for Portuguese text""" | |
result = sentiment_pipeline(text) | |
happiness_score = result[0]['score'] * 100 | |
return happiness_score | |
def process_audio(audio_file): | |
""" | |
Complete the full process of transcribing and analyzing the sentiment of an audio file. | |
""" | |
transcript = transcribe_audio(audio_file) | |
happiness = analyze_sentiment(transcript) | |
return transcript, happiness | |
def interface_audio(audio): | |
transcript, happiness = process_audio(audio) | |
return transcript, f"Happiness Score: {happiness}%" | |
def process_text_log(json_file): | |
""" | |
Process a JSON file containing chat logs. | |
JSON must be a list of messages, where each item has a key 'message' or 'text'. | |
Applies sentiment analysis to each message and returns a summary. | |
""" | |
try: | |
data = json.load(open(json_file)) | |
except Exception as e: | |
return f"Error reading JSON file: {e}", 0, 0 | |
total_messages = 0 | |
soma_happiness = 0 | |
detalhes = "" | |
for item in data: | |
text = item.get("message") or item.get("text") | |
if text: | |
total_messages += 1 | |
label, happiness_score = analyze_sentiment(text) | |
soma_happiness += happiness_score | |
detalhes += f"Message: {text}\nSentiment: {label}, Happiness: {happiness_score}%\n\n" | |
overall_happiness = round(soma_happiness / total_messages, 2) if total_messages > 0 else 0 | |
summary = f"Total messages: {total_messages}\nHappiness Score: {overall_happiness}%\n\nDetails:\n" + detalhes | |
return summary, total_messages, overall_happiness | |
# Create the Gradio interface | |
with gr.Blocks() as demo: | |
gr.Markdown("# Analyze Audio and Chat Logs") | |
with gr.Tabs(): | |
with gr.TabItem("Audio"): | |
gr.Markdown("## Audio Analysis") | |
audio_input = gr.Audio(sources=["microphone"], type="filepath", label="Record or Upload Audio") | |
transcript_output = gr.Textbox(label="Transcript") | |
kpi_output = gr.Textbox(label="Happiness Score") | |
audio_btn = gr.Button("Process Audio") | |
audio_btn.click(interface_audio, inputs=audio_input, outputs=[transcript_output, kpi_output]) | |
with gr.TabItem("Chat Logs"): | |
gr.Markdown("## Chat logs (JSON)") | |
gr.Markdown("Upload a JSON file with a chat log. Each item should have a 'message' or 'text' key.") | |
json_input = gr.File(label="JSON File") | |
log_summary_output = gr.Textbox(label="Summary", lines=10) | |
total_messages_output = gr.Number(label="Total Messages") | |
overall_happiness_output = gr.Number(label="Happiness Score (%)") | |
json_btn = gr.Button("Process Chat Log") | |
json_btn.click(process_text_log, inputs=json_input, outputs=[log_summary_output, total_messages_output, overall_happiness_output]) | |
gr.Markdown(""" | |
# 🎙️ Welcome to HappyPulse! | |
## 🚀 How It Works | |
HappyPulse is a sentiment analysis tool that allows you to record your voice or upload an audio file and analyze the emotions behind your speech. Using AI-powered transcription and sentiment analysis, we provide insights into the emotional tone of your communication. | |
## 🎤 Try It Now! | |
### **Audio Analysis** 🎧 | |
- Click **Record** and speak naturally, expressing different emotions. | |
- Upload an audio file if you prefer. | |
- Our AI will transcribe your speech and analyze its sentiment. | |
- Get instant feedback on the emotional tone detected! | |
### **Chat Log Analysis** 💬 | |
- Upload a JSON file containing chat conversations. | |
- The system will analyze each message's sentiment. | |
- Understand customer emotions and improve interactions. | |
## 📊 Why Use HappyPulse? | |
✅ Understand emotional tones in conversations. | |
✅ Improve customer service by detecting frustration in real-time. | |
✅ Gain insights into communication effectiveness. | |
Give it a try and see how emotions influence speech and text! 🚀 | |
""") | |
demo.launch() |