Spaces:

feliponi
/

HappyPulse

Runtime error

File size: 6,181 Bytes

import gradio as gr
import json
import soundfile as sf
import torchaudio
import numpy as np
import torch
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer

# 1. Portuguese ASR model
asr_pipeline = pipeline(
    "automatic-speech-recognition",
    model="facebook/wav2vec2-large-xlsr-53-portuguese"  # Speech-to-text for Portuguese
)

# 2. Portuguese sentiment analysis model
MODEL_NAME = "lipaoMai/BERT-sentiment-analysis-portuguese"  # BERT model for Portuguese sentiment analysis
sentiment_model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
sentiment_pipeline = pipeline("sentiment-analysis", model=sentiment_model, tokenizer=tokenizer)

def transcribe_audio(audio_file):
    """Transcreve o áudio garantindo a taxa de amostragem correta e o formato adequado para o modelo."""
    
    # Carrega o áudio e obtém a taxa de amostragem
    audio_data, sample_rate = sf.read(audio_file, dtype="float32")  # Garante que os dados sejam float32

    # Converte para array NumPy (garantindo que seja um array unidimensional)
    if len(audio_data.shape) > 1:  # Se for estéreo, converte para mono
        audio_data = np.mean(audio_data, axis=1)

    # Converte para tensor float32 do PyTorch antes de resamplear
    audio_tensor = torch.tensor(audio_data, dtype=torch.float32)

    # Se a taxa de amostragem não for 16 kHz, converte
    if sample_rate != 16000:
        resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)
        audio_tensor = resampler(audio_tensor)
        sample_rate = 16000  # Atualiza a taxa de amostragem

    # Passa o áudio para o pipeline ASR
    result = asr_pipeline(audio_tensor.numpy())  # Converte de volta para NumPy antes de passar para o modelo

    return result.get("text", "")


def analyze_sentiment(text):
    """Sentiment analysis for Portuguese text"""
    result = sentiment_pipeline(text)
    happiness_score = result[0]['score'] * 100
 
    return happiness_score

def process_audio(audio_file):
    """
    Complete the full process of transcribing and analyzing the sentiment of an audio file. 
    """
    transcript = transcribe_audio(audio_file)
    happiness = analyze_sentiment(transcript)
    return transcript, happiness

def interface_audio(audio):
    transcript, happiness = process_audio(audio)
    return transcript, f"Happiness Score: {happiness}%"

def process_text_log(json_file):
    """
    Process a JSON file containing chat logs.
    JSON must be a list of messages, where each item has a key 'message' or 'text'.
    Applies sentiment analysis to each message and returns a summary.
    """
    try:
        data = json.load(open(json_file))
    except Exception as e:
        return f"Error reading JSON file: {e}", 0, 0
    
    total_messages = 0
    soma_happiness = 0
    detalhes = ""
    
    for item in data:
        text = item.get("message") or item.get("text")
        if text:
            total_messages += 1
            label, happiness_score = analyze_sentiment(text)
            soma_happiness += happiness_score
            detalhes += f"Message: {text}\nSentiment: {label}, Happiness: {happiness_score}%\n\n"
    
    overall_happiness = round(soma_happiness / total_messages, 2) if total_messages > 0 else 0
    summary = f"Total messages: {total_messages}\nHappiness Score: {overall_happiness}%\n\nDetails:\n" + detalhes
    return summary, total_messages, overall_happiness

# Create the Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# Analyze Audio and Chat Logs")
    
    with gr.Tabs():
        with gr.TabItem("Audio"):
            gr.Markdown("## Audio Analysis")
            audio_input = gr.Audio(sources=["microphone"], type="filepath", label="Record or Upload Audio")
            transcript_output = gr.Textbox(label="Transcript")
            kpi_output = gr.Textbox(label="Happiness Score")
            audio_btn = gr.Button("Process Audio")
            audio_btn.click(interface_audio, inputs=audio_input, outputs=[transcript_output, kpi_output])
        
        with gr.TabItem("Chat Logs"):
            gr.Markdown("## Chat logs (JSON)")
            gr.Markdown("Upload a JSON file with a chat log. Each item should have a 'message' or 'text' key.")
            json_input = gr.File(label="JSON File")
            log_summary_output = gr.Textbox(label="Summary", lines=10)
            total_messages_output = gr.Number(label="Total Messages")
            overall_happiness_output = gr.Number(label="Happiness Score (%)")
            json_btn = gr.Button("Process Chat Log")
            json_btn.click(process_text_log, inputs=json_input, outputs=[log_summary_output, total_messages_output, overall_happiness_output])
    gr.Markdown("""
                # 🎙️ Welcome to HappyPulse!

                ## 🚀 How It Works

                HappyPulse is a sentiment analysis tool that allows you to record your voice or upload an audio file and analyze the emotions behind your speech. Using AI-powered transcription and sentiment analysis, we provide insights into the emotional tone of your communication.

                ## 🎤 Try It Now!

                ### **Audio Analysis** 🎧
                - Click **Record** and speak naturally, expressing different emotions.
                - Upload an audio file if you prefer.
                - Our AI will transcribe your speech and analyze its sentiment.
                - Get instant feedback on the emotional tone detected!

                ### **Chat Log Analysis** 💬
                - Upload a JSON file containing chat conversations.
                - The system will analyze each message's sentiment.
                - Understand customer emotions and improve interactions.

                ## 📊 Why Use HappyPulse?

                ✅ Understand emotional tones in conversations.
                ✅ Improve customer service by detecting frustration in real-time.
                ✅ Gain insights into communication effectiveness.

                Give it a try and see how emotions influence speech and text! 🚀
        """)
    
    demo.launch()