import gradio as gr import json import soundfile as sf import torchaudio import numpy as np import torch from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer # 1. Portuguese ASR model asr_pipeline = pipeline( "automatic-speech-recognition", model="facebook/wav2vec2-large-xlsr-53-portuguese" # Speech-to-text for Portuguese ) # 2. Portuguese sentiment analysis model MODEL_NAME = "lipaoMai/BERT-sentiment-analysis-portuguese" # BERT model for Portuguese sentiment analysis sentiment_model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME) tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) sentiment_pipeline = pipeline("sentiment-analysis", model=sentiment_model, tokenizer=tokenizer) def transcribe_audio(audio_file): """Transcreve o áudio garantindo a taxa de amostragem correta e o formato adequado para o modelo.""" # Carrega o áudio e obtém a taxa de amostragem audio_data, sample_rate = sf.read(audio_file, dtype="float32") # Garante que os dados sejam float32 # Converte para array NumPy (garantindo que seja um array unidimensional) if len(audio_data.shape) > 1: # Se for estéreo, converte para mono audio_data = np.mean(audio_data, axis=1) # Converte para tensor float32 do PyTorch antes de resamplear audio_tensor = torch.tensor(audio_data, dtype=torch.float32) # Se a taxa de amostragem não for 16 kHz, converte if sample_rate != 16000: resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000) audio_tensor = resampler(audio_tensor) sample_rate = 16000 # Atualiza a taxa de amostragem # Passa o áudio para o pipeline ASR result = asr_pipeline(audio_tensor.numpy()) # Converte de volta para NumPy antes de passar para o modelo return result.get("text", "") def analyze_sentiment(text): """Sentiment analysis for Portuguese text""" result = sentiment_pipeline(text) happiness_score = result[0]['score'] * 100 return happiness_score def process_audio(audio_file): """ Complete the full process of transcribing and analyzing the sentiment of an audio file. """ transcript = transcribe_audio(audio_file) happiness = analyze_sentiment(transcript) return transcript, happiness def interface_audio(audio): transcript, happiness = process_audio(audio) return transcript, f"Happiness Score: {happiness}%" def process_text_log(json_file): """ Process a JSON file containing chat logs. JSON must be a list of messages, where each item has a key 'message' or 'text'. Applies sentiment analysis to each message and returns a summary. """ try: data = json.load(open(json_file)) except Exception as e: return f"Error reading JSON file: {e}", 0, 0 total_messages = 0 soma_happiness = 0 detalhes = "" for item in data: text = item.get("message") or item.get("text") if text: total_messages += 1 label, happiness_score = analyze_sentiment(text) soma_happiness += happiness_score detalhes += f"Message: {text}\nSentiment: {label}, Happiness: {happiness_score}%\n\n" overall_happiness = round(soma_happiness / total_messages, 2) if total_messages > 0 else 0 summary = f"Total messages: {total_messages}\nHappiness Score: {overall_happiness}%\n\nDetails:\n" + detalhes return summary, total_messages, overall_happiness # Create the Gradio interface with gr.Blocks() as demo: gr.Markdown("# Analyze Audio and Chat Logs") with gr.Tabs(): with gr.TabItem("Audio"): gr.Markdown("## Audio Analysis") audio_input = gr.Audio(sources=["microphone"], type="filepath", label="Record or Upload Audio") transcript_output = gr.Textbox(label="Transcript") kpi_output = gr.Textbox(label="Happiness Score") audio_btn = gr.Button("Process Audio") audio_btn.click(interface_audio, inputs=audio_input, outputs=[transcript_output, kpi_output]) with gr.TabItem("Chat Logs"): gr.Markdown("## Chat logs (JSON)") gr.Markdown("Upload a JSON file with a chat log. Each item should have a 'message' or 'text' key.") json_input = gr.File(label="JSON File") log_summary_output = gr.Textbox(label="Summary", lines=10) total_messages_output = gr.Number(label="Total Messages") overall_happiness_output = gr.Number(label="Happiness Score (%)") json_btn = gr.Button("Process Chat Log") json_btn.click(process_text_log, inputs=json_input, outputs=[log_summary_output, total_messages_output, overall_happiness_output]) gr.Markdown(""" # 🎙️ Welcome to HappyPulse! ## 🚀 How It Works HappyPulse is a sentiment analysis tool that allows you to record your voice or upload an audio file and analyze the emotions behind your speech. Using AI-powered transcription and sentiment analysis, we provide insights into the emotional tone of your communication. ## 🎤 Try It Now! ### **Audio Analysis** 🎧 - Click **Record** and speak naturally, expressing different emotions. - Upload an audio file if you prefer. - Our AI will transcribe your speech and analyze its sentiment. - Get instant feedback on the emotional tone detected! ### **Chat Log Analysis** 💬 - Upload a JSON file containing chat conversations. - The system will analyze each message's sentiment. - Understand customer emotions and improve interactions. ## 📊 Why Use HappyPulse? ✅ Understand emotional tones in conversations. ✅ Improve customer service by detecting frustration in real-time. ✅ Gain insights into communication effectiveness. Give it a try and see how emotions influence speech and text! 🚀 """) demo.launch()