File size: 6,181 Bytes
b0ec4af
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7c5e6c4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
638fe90
b0ec4af
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
import gradio as gr
import json
import soundfile as sf
import torchaudio
import numpy as np
import torch
from transformers import pipeline, AutoModelForSequenceClassification, AutoTokenizer

# 1. Portuguese ASR model
asr_pipeline = pipeline(
    "automatic-speech-recognition",
    model="facebook/wav2vec2-large-xlsr-53-portuguese"  # Speech-to-text for Portuguese
)

# 2. Portuguese sentiment analysis model
MODEL_NAME = "lipaoMai/BERT-sentiment-analysis-portuguese"  # BERT model for Portuguese sentiment analysis
sentiment_model = AutoModelForSequenceClassification.from_pretrained(MODEL_NAME)
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
sentiment_pipeline = pipeline("sentiment-analysis", model=sentiment_model, tokenizer=tokenizer)

def transcribe_audio(audio_file):
    """Transcreve o áudio garantindo a taxa de amostragem correta e o formato adequado para o modelo."""
    
    # Carrega o áudio e obtém a taxa de amostragem
    audio_data, sample_rate = sf.read(audio_file, dtype="float32")  # Garante que os dados sejam float32

    # Converte para array NumPy (garantindo que seja um array unidimensional)
    if len(audio_data.shape) > 1:  # Se for estéreo, converte para mono
        audio_data = np.mean(audio_data, axis=1)

    # Converte para tensor float32 do PyTorch antes de resamplear
    audio_tensor = torch.tensor(audio_data, dtype=torch.float32)

    # Se a taxa de amostragem não for 16 kHz, converte
    if sample_rate != 16000:
        resampler = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)
        audio_tensor = resampler(audio_tensor)
        sample_rate = 16000  # Atualiza a taxa de amostragem

    # Passa o áudio para o pipeline ASR
    result = asr_pipeline(audio_tensor.numpy())  # Converte de volta para NumPy antes de passar para o modelo

    return result.get("text", "")


def analyze_sentiment(text):
    """Sentiment analysis for Portuguese text"""
    result = sentiment_pipeline(text)
    happiness_score = result[0]['score'] * 100
 
    return happiness_score

def process_audio(audio_file):
    """
    Complete the full process of transcribing and analyzing the sentiment of an audio file. 
    """
    transcript = transcribe_audio(audio_file)
    happiness = analyze_sentiment(transcript)
    return transcript, happiness

def interface_audio(audio):
    transcript, happiness = process_audio(audio)
    return transcript, f"Happiness Score: {happiness}%"

def process_text_log(json_file):
    """
    Process a JSON file containing chat logs.
    JSON must be a list of messages, where each item has a key 'message' or 'text'.
    Applies sentiment analysis to each message and returns a summary.
    """
    try:
        data = json.load(open(json_file))
    except Exception as e:
        return f"Error reading JSON file: {e}", 0, 0
    
    total_messages = 0
    soma_happiness = 0
    detalhes = ""
    
    for item in data:
        text = item.get("message") or item.get("text")
        if text:
            total_messages += 1
            label, happiness_score = analyze_sentiment(text)
            soma_happiness += happiness_score
            detalhes += f"Message: {text}\nSentiment: {label}, Happiness: {happiness_score}%\n\n"
    
    overall_happiness = round(soma_happiness / total_messages, 2) if total_messages > 0 else 0
    summary = f"Total messages: {total_messages}\nHappiness Score: {overall_happiness}%\n\nDetails:\n" + detalhes
    return summary, total_messages, overall_happiness

# Create the Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# Analyze Audio and Chat Logs")
    
    with gr.Tabs():
        with gr.TabItem("Audio"):
            gr.Markdown("## Audio Analysis")
            audio_input = gr.Audio(sources=["microphone"], type="filepath", label="Record or Upload Audio")
            transcript_output = gr.Textbox(label="Transcript")
            kpi_output = gr.Textbox(label="Happiness Score")
            audio_btn = gr.Button("Process Audio")
            audio_btn.click(interface_audio, inputs=audio_input, outputs=[transcript_output, kpi_output])
        
        with gr.TabItem("Chat Logs"):
            gr.Markdown("## Chat logs (JSON)")
            gr.Markdown("Upload a JSON file with a chat log. Each item should have a 'message' or 'text' key.")
            json_input = gr.File(label="JSON File")
            log_summary_output = gr.Textbox(label="Summary", lines=10)
            total_messages_output = gr.Number(label="Total Messages")
            overall_happiness_output = gr.Number(label="Happiness Score (%)")
            json_btn = gr.Button("Process Chat Log")
            json_btn.click(process_text_log, inputs=json_input, outputs=[log_summary_output, total_messages_output, overall_happiness_output])
    gr.Markdown("""
                # 🎙️ Welcome to HappyPulse!

                ## 🚀 How It Works

                HappyPulse is a sentiment analysis tool that allows you to record your voice or upload an audio file and analyze the emotions behind your speech. Using AI-powered transcription and sentiment analysis, we provide insights into the emotional tone of your communication.

                ## 🎤 Try It Now!

                ### **Audio Analysis** 🎧
                - Click **Record** and speak naturally, expressing different emotions.
                - Upload an audio file if you prefer.
                - Our AI will transcribe your speech and analyze its sentiment.
                - Get instant feedback on the emotional tone detected!

                ### **Chat Log Analysis** 💬
                - Upload a JSON file containing chat conversations.
                - The system will analyze each message's sentiment.
                - Understand customer emotions and improve interactions.

                ## 📊 Why Use HappyPulse?

                ✅ Understand emotional tones in conversations.
                ✅ Improve customer service by detecting frustration in real-time.
                ✅ Gain insights into communication effectiveness.

                Give it a try and see how emotions influence speech and text! 🚀
        """)
    
    demo.launch()