File size: 3,122 Bytes
7d69146
f9ac6a3
0e6c5ba
 
8081c23
0e6c5ba
 
7d69146
0e6c5ba
 
7d69146
0e6c5ba
 
 
 
8081c23
0e6c5ba
 
8081c23
0e6c5ba
8081c23
0e6c5ba
 
8081c23
 
0e6c5ba
8081c23
0e6c5ba
8081c23
0e6c5ba
8a85e12
0e6c5ba
7d69146
0e6c5ba
 
 
 
 
8081c23
0e6c5ba
 
 
 
 
 
8081c23
0e6c5ba
 
8081c23
0e6c5ba
 
 
8081c23
0e6c5ba
 
 
8081c23
0e6c5ba
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7d69146
0e6c5ba
fa0c534
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import gradio as gr
import pandas as pd
from openai import OpenAI
import os
import io
import json
from transformers import pipeline

# Modello NLP open-source locale per il primo livello di analisi
sentiment_pipeline = pipeline("sentiment-analysis", model="nlptown/bert-base-multilingual-uncased-sentiment")

# Funzione per analizzare i commenti con GPT solo se necessario
def analyze_comments(texts):
    """ Usa prima un modello NLP locale, poi GPT-3.5/GPT-4 solo se serve."""
    client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))
    results = []
    to_send_gpt = []
    index_map = {}
    
    # 1️⃣ Analisi iniziale con modello open-source
    for i, text in enumerate(texts):
        local_result = sentiment_pipeline(text)[0]['label']
        rating = int(local_result.split()[0])
        
        if rating > 3:
            results.append("Positive")
        elif rating < 3:
            results.append("Negative")
        else:
            results.append("Neutral")
    
    return results


def analyze_file(file):
    """Carica un file e analizza il sentiment dei commenti nella prima colonna."""
    try:
        file_extension = os.path.splitext(file.name)[-1].lower()
        
        if file_extension == ".csv":
            df = pd.read_csv(file)
        elif file_extension in [".xls", ".xlsx"]:
            df = pd.read_excel(file, engine='openpyxl')
        else:
            return "Error: Unsupported file format.", None
        
        if df.empty or df.shape[1] < 1:
            return "Error: No valid data in the file.", None
        
        df = df.rename(columns={df.columns[0]: "Comment"})
        batch_size = 10
        sentiments = []
        
        for i in range(0, len(df), batch_size):
            batch = df['Comment'][i:i+batch_size].tolist()
            sentiments.extend(analyze_comments(batch))
        
        df["Sentiment"] = sentiments
        output = io.BytesIO()
        with pd.ExcelWriter(output, engine='openpyxl') as writer:
            df.to_excel(writer, index=False)
        output.seek(0)
        return df, output
    except Exception as e:
        return f"Error processing file: {str(e)}", None


def sentiment_analysis_interface():
    """Interfaccia Gradio per caricare file e ottenere analisi del sentiment."""
    file_input = gr.File(label="📥 Upload a file (CSV, XLSX)")
    results_output = gr.Dataframe()
    download_button = gr.File(label="📥 Download Excel")
    
    def process_file(uploaded_file):
        df, excel_data = analyze_file(uploaded_file)
        if isinstance(df, str):
            return df, None
        with open("sentiment_analysis.xlsx", "wb") as f:
            f.write(excel_data.read())
        return df, "sentiment_analysis.xlsx"
    
    return gr.Interface(
        fn=process_file,
        inputs=[file_input],
        outputs=[results_output, download_button],
        title="📊 Sentiment Analysis",
        description="Upload a file with comments and get sentiment analysis using an NLP model!"
    )

iface = sentiment_analysis_interface()

if __name__ == "__main__":
    iface.launch(share=True)