Spaces:
Runtime error
Runtime error
File size: 6,181 Bytes
b0ec4af 7c5e6c4 638fe90 b0ec4af |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 |
import gradio as gr
import json
import soundfile as sf
import torchaudio
import numpy as np
import torch
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer
# 1. Portuguese ASR model
asr_pipeline = pipeline(
"automatic-speech-recognition",
model="facebook/wav2vec2-large-xlsr-53-portuguese" # Speech-to-text for Portuguese
)
# 2. Portuguese sentiment analysis model
MODEL_NAME = "lipaoMai/BERT-sentiment-analysis-portuguese" # BERT model for Portuguese sentiment analysis
sentiment_model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
sentiment_pipeline = pipeline("sentiment-analysis", model=sentiment_model, tokenizer=tokenizer)
def transcribe_audio(audio_file):
"""Transcreve o áudio garantindo a taxa de amostragem correta e o formato adequado para o modelo."""
# Carrega o áudio e obtém a taxa de amostragem
audio_data, sample_rate = sf.read(audio_file, dtype="float32") # Garante que os dados sejam float32
# Converte para array NumPy (garantindo que seja um array unidimensional)
if len(audio_data.shape) > 1: # Se for estéreo, converte para mono
audio_data = np.mean(audio_data, axis=1)
# Converte para tensor float32 do PyTorch antes de resamplear
audio_tensor = torch.tensor(audio_data, dtype=torch.float32)
# Se a taxa de amostragem não for 16 kHz, converte
if sample_rate != 16000:
resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)
audio_tensor = resampler(audio_tensor)
sample_rate = 16000 # Atualiza a taxa de amostragem
# Passa o áudio para o pipeline ASR
result = asr_pipeline(audio_tensor.numpy()) # Converte de volta para NumPy antes de passar para o modelo
return result.get("text", "")
def analyze_sentiment(text):
"""Sentiment analysis for Portuguese text"""
result = sentiment_pipeline(text)
happiness_score = result[0]['score'] * 100
return happiness_score
def process_audio(audio_file):
"""
Complete the full process of transcribing and analyzing the sentiment of an audio file.
"""
transcript = transcribe_audio(audio_file)
happiness = analyze_sentiment(transcript)
return transcript, happiness
def interface_audio(audio):
transcript, happiness = process_audio(audio)
return transcript, f"Happiness Score: {happiness}%"
def process_text_log(json_file):
"""
Process a JSON file containing chat logs.
JSON must be a list of messages, where each item has a key 'message' or 'text'.
Applies sentiment analysis to each message and returns a summary.
"""
try:
data = json.load(open(json_file))
except Exception as e:
return f"Error reading JSON file: {e}", 0, 0
total_messages = 0
soma_happiness = 0
detalhes = ""
for item in data:
text = item.get("message") or item.get("text")
if text:
total_messages += 1
label, happiness_score = analyze_sentiment(text)
soma_happiness += happiness_score
detalhes += f"Message: {text}\nSentiment: {label}, Happiness: {happiness_score}%\n\n"
overall_happiness = round(soma_happiness / total_messages, 2) if total_messages > 0 else 0
summary = f"Total messages: {total_messages}\nHappiness Score: {overall_happiness}%\n\nDetails:\n" + detalhes
return summary, total_messages, overall_happiness
# Create the Gradio interface
with gr.Blocks() as demo:
gr.Markdown("# Analyze Audio and Chat Logs")
with gr.Tabs():
with gr.TabItem("Audio"):
gr.Markdown("## Audio Analysis")
audio_input = gr.Audio(sources=["microphone"], type="filepath", label="Record or Upload Audio")
transcript_output = gr.Textbox(label="Transcript")
kpi_output = gr.Textbox(label="Happiness Score")
audio_btn = gr.Button("Process Audio")
audio_btn.click(interface_audio, inputs=audio_input, outputs=[transcript_output, kpi_output])
with gr.TabItem("Chat Logs"):
gr.Markdown("## Chat logs (JSON)")
gr.Markdown("Upload a JSON file with a chat log. Each item should have a 'message' or 'text' key.")
json_input = gr.File(label="JSON File")
log_summary_output = gr.Textbox(label="Summary", lines=10)
total_messages_output = gr.Number(label="Total Messages")
overall_happiness_output = gr.Number(label="Happiness Score (%)")
json_btn = gr.Button("Process Chat Log")
json_btn.click(process_text_log, inputs=json_input, outputs=[log_summary_output, total_messages_output, overall_happiness_output])
gr.Markdown("""
# 🎙️ Welcome to HappyPulse!
## 🚀 How It Works
HappyPulse is a sentiment analysis tool that allows you to record your voice or upload an audio file and analyze the emotions behind your speech. Using AI-powered transcription and sentiment analysis, we provide insights into the emotional tone of your communication.
## 🎤 Try It Now!
### **Audio Analysis** 🎧
- Click **Record** and speak naturally, expressing different emotions.
- Upload an audio file if you prefer.
- Our AI will transcribe your speech and analyze its sentiment.
- Get instant feedback on the emotional tone detected!
### **Chat Log Analysis** 💬
- Upload a JSON file containing chat conversations.
- The system will analyze each message's sentiment.
- Understand customer emotions and improve interactions.
## 📊 Why Use HappyPulse?
✅ Understand emotional tones in conversations.
✅ Improve customer service by detecting frustration in real-time.
✅ Gain insights into communication effectiveness.
Give it a try and see how emotions influence speech and text! 🚀
""")
demo.launch() |