File size: 5,167 Bytes
ed5c736
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
import pandas as pd
import re
import tempfile
import gradio as gr
import matplotlib.pyplot as plt
import seaborn as sns
from wordcloud import WordCloud
from flair.models import TextClassifier
from flair.data import Sentence
from flair.nn import Classifier

# Load FLAIR sentiment model
classifier = Classifier.load("sentiment")

# Minimal text cleaning
def clean_text_for_flair(text):
    if pd.isnull(text):
        return ""
    text = re.sub(r"http\S+|www\S+", "", text)  # remove URLs
    text = re.sub(r"<.*?>", "", text)  # remove HTML
    text = re.sub(r"\s+", " ", text).strip()  # normalize whitespace
    return text

# Generate word clouds
def generate_wordclouds(df):
    if "sentiment" not in df.columns or "clean_text" not in df.columns:
        return None, None

    positive_text = " ".join(df[df["sentiment"] == "POSITIVE"]["clean_text"].astype(str))
    negative_text = " ".join(df[df["sentiment"] == "NEGATIVE"]["clean_text"].astype(str))

    pos_wordcloud = WordCloud(width=800, height=400, background_color='white', colormap='Greens').generate(positive_text)
    neg_wordcloud = WordCloud(width=800, height=400, background_color='white', colormap='Reds').generate(negative_text)

    pos_path = "positive_wordcloud.png"
    plt.figure(figsize=(10, 5))
    plt.imshow(pos_wordcloud, interpolation='bilinear')
    plt.axis("off")
    plt.title("Positive Word Cloud")
    plt.tight_layout()
    plt.savefig(pos_path)
    plt.close()

    neg_path = "negative_wordcloud.png"
    plt.figure(figsize=(10, 5))
    plt.imshow(neg_wordcloud, interpolation='bilinear')
    plt.axis("off")
    plt.title("Negative Word Cloud")
    plt.tight_layout()
    plt.savefig(neg_path)
    plt.close()

    return pos_path, neg_path

# Main analysis function
def analyze_sentiment_flair(file, text_column):
    try:
        df = pd.read_csv(file.name)
    except Exception as e:
        return f"Error loading file: {e}", None, None, None, None, None

    if text_column not in df.columns:
        return "Selected text column not found.", None, None, None, None, None

    df["clean_text"] = df[text_column].apply(clean_text_for_flair)

    sentiments = []
    scores = []

    for text in df["clean_text"]:
        sentence = Sentence(text)
        classifier.predict(sentence)
        label = sentence.labels[0].value
        score = sentence.labels[0].score
        sentiments.append(label)
        scores.append(score)

    df["sentiment"] = sentiments
    df["confidence"] = scores

    # Save results
    with tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix=".csv") as tmp:
        df.to_csv(tmp.name, index=False)
        csv_path = tmp.name

    # Sentiment count plot
    plt.figure(figsize=(6, 4))
    sns.countplot(data=df, x="sentiment", palette="pastel")
    plt.title("Sentiment Distribution")
    plt.tight_layout()
    sentiment_plot_path = "sentiment_flair_plot.png"
    plt.savefig(sentiment_plot_path)
    plt.close()

    # Confidence score distribution
    plt.figure(figsize=(6, 4))
    sns.histplot(df["confidence"], bins=30, kde=True, color="lightblue")
    plt.title("Confidence Score Distribution")
    plt.tight_layout()
    confidence_plot_path = "confidence_flair_plot.png"
    plt.savefig(confidence_plot_path)
    plt.close()

    # Word clouds
    pos_wc_path, neg_wc_path = generate_wordclouds(df)

    return f"Sentiment analysis completed on {len(df)} rows.", csv_path, sentiment_plot_path, confidence_plot_path, pos_wc_path, neg_wc_path

# Gradio interface
with gr.Blocks() as app:
    gr.Markdown("## FLAIR-Based Sentiment Analyzer with Word Clouds")
    gr.Markdown("Upload a CSV file with text data. This tool uses [FLAIR](https://github.com/flairNLP/flair) for sentiment classification (POSITIVE / NEGATIVE), shows confidence scores, and generates word clouds for each sentiment.")

    with gr.Row():
        file_input = gr.File(label="Upload CSV", file_types=[".csv"])
        col_dropdown = gr.Dropdown(label="Select Text Column", choices=[], interactive=True)

    def get_text_columns(file):
        try:
            df = pd.read_csv(file.name)
            text_cols = df.select_dtypes(include='object').columns.tolist()
            if not text_cols:
                return gr.update(choices=[], label="⚠️ No text columns found")
            return gr.update(choices=text_cols, value=text_cols[0])
        except:
            return gr.update(choices=[], value=None)

    file_input.change(get_text_columns, inputs=file_input, outputs=col_dropdown)

    analyze_btn = gr.Button("Run Sentiment Analysis")

    output_text = gr.Textbox(label="Status")
    file_output = gr.File(label="Download Results CSV")
    sentiment_plot = gr.Image(label="Sentiment Distribution")
    confidence_plot = gr.Image(label="Confidence Score Distribution")
    wordcloud_pos = gr.Image(label="Positive Word Cloud")
    wordcloud_neg = gr.Image(label="Negative Word Cloud")

    analyze_btn.click(
        analyze_sentiment_flair,
        inputs=[file_input, col_dropdown],
        outputs=[output_text, file_output, sentiment_plot, confidence_plot, wordcloud_pos, wordcloud_neg]
    )

app.launch(share=True, debug=True)