File size: 5,965 Bytes
8ba6acf
 
 
 
 
 
 
 
 
 
 
 
 
5d5fca8
 
8ba6acf
 
 
 
 
 
 
 
 
 
 
 
4a5da49
 
 
8ba6acf
4a5da49
8ba6acf
 
 
 
fd0f5ba
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8ba6acf
4a5da49
bc03c5a
 
 
 
8ba6acf
 
bc03c5a
8ba6acf
 
 
 
4a5da49
8ba6acf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5d5fca8
 
 
 
 
4a5da49
 
 
8ba6acf
5d5fca8
 
 
 
 
 
 
 
 
 
 
8ba6acf
 
 
 
 
 
 
 
 
5d5fca8
 
 
 
 
 
 
 
 
 
 
 
8ba6acf
 
4a5da49
 
 
8ba6acf
 
 
 
 
 
 
4a5da49
 
 
 
ab91c58
8ba6acf
bc03c5a
ab91c58
4a5da49
 
5d5fca8
 
 
 
8ba6acf
 
 
4a5da49
 
5d5fca8
 
 
 
 
 
 
 
 
 
8ba6acf
 
5d5fca8
4a5da49
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
import nltk
nltk.download('punkt')

import textblob.download_corpora
textblob.download_corpora.download_all()

import pandas as pd
import re
from textblob import TextBlob
import gradio as gr
import matplotlib.pyplot as plt
import seaborn as sns
import tempfile
from wordcloud import WordCloud


# Text cleaning function
def clean_text(text):
    if pd.isnull(text):
        return ""
    text = str(text)
    text = re.sub(r"http\S+|www\S+|https\S+", '', text, flags=re.MULTILINE)
    text = re.sub(r"\@w+|\#", '', text)
    text = re.sub(r"[^A-Za-z0-9\s]+", '', text)
    text = text.lower()
    return text.strip()

# Sentiment classification using thresholds
def get_sentiment_label(polarity, pos_thresh, neg_thresh):
    if polarity >= pos_thresh:
        return "Positive"
    elif polarity <= neg_thresh:
        return "Negative"
    else:
        return "Neutral"

# Generate word cloud
def generate_wordcloud(text_series, title):
    text = " ".join(text_series.dropna())
    if not text.strip():
        fig = plt.figure(figsize=(6, 4))
        plt.text(0.5, 0.5, f"No data for {title}", fontsize=14, ha='center', va='center')
        plt.axis("off")
        plt.title(title)
        plt.tight_layout()
        return fig

    wc = WordCloud(width=600, height=400, background_color="white", colormap="tab10").generate(text)
    fig = plt.figure(figsize=(6, 4))
    plt.imshow(wc, interpolation="bilinear")
    plt.axis("off")
    plt.title(title)
    plt.tight_layout()
    return fig


# Main processing function
def analyze_sentiment(file, text_column, pos_thresh, neg_thresh):
    try:
        df = pd.read_csv(file)
    except Exception as e:
        return f"❌ Error reading CSV file: {e}", None, None, None, None

    if text_column not in df.columns:
        return "⚠️ Selected column not found in the uploaded file.", None, None, None, None

    df["clean_text"] = df[text_column].apply(clean_text)
    df["polarity"] = df["clean_text"].apply(lambda x: TextBlob(x).sentiment.polarity)
    df["subjectivity"] = df["clean_text"].apply(lambda x: TextBlob(x).sentiment.subjectivity)
    df["sentiment"] = df["polarity"].apply(lambda p: get_sentiment_label(p, pos_thresh, neg_thresh))

    # Plot sentiment distribution
    fig1 = plt.figure(figsize=(6, 4))
    sns.countplot(data=df, x="sentiment", hue="sentiment", palette="Set2", legend=False)
    plt.title("Sentiment Label Distribution")
    plt.tight_layout()

    # Plot polarity distribution
    fig2 = plt.figure(figsize=(6, 4))
    sns.histplot(df["polarity"], bins=30, kde=True, color="skyblue")
    plt.title("Polarity Score Distribution")
    plt.tight_layout()

    # Preview table
    preview_df = df[[text_column, "clean_text", "polarity", "subjectivity", "sentiment"]].head(10)

    # Word Clouds per sentiment
    pos_wc = generate_wordcloud(df[df["sentiment"] == "Positive"]["clean_text"], "Positive Word Cloud")
    neg_wc = generate_wordcloud(df[df["sentiment"] == "Negative"]["clean_text"], "Negative Word Cloud")
    neu_wc = generate_wordcloud(df[df["sentiment"] == "Neutral"]["clean_text"], "Neutral Word Cloud")

    # Save full results
    output_file_path = "TextBlob_sentiment_results.csv"
    df.to_csv(output_file_path, index=False)

    return (
        f"✅ Sentiment analysis complete. Processed {len(df)} rows.",
        preview_df,
        fig1,
        fig2,
        output_file_path,
        pos_wc,
        neg_wc,
        neu_wc
    )


# Dropdown update function
def get_text_columns(file):
    df = pd.read_csv(file)
    text_columns = df.select_dtypes(include='object').columns.tolist()
    if not text_columns:
        return gr.update(choices=[], value=None, label="⚠️ No text columns found!")
    return gr.update(choices=text_columns, value=text_columns[0])

# Word cloud function 
def generate_wordcloud(text_series, title):
    text = " ".join(text_series.dropna())
    wc = WordCloud(width=600, height=400, background_color="white", colormap="tab10").generate(text)
    fig = plt.figure(figsize=(6, 4))
    plt.imshow(wc, interpolation="bilinear")
    plt.axis("off")
    plt.title(title)
    plt.tight_layout()
    return fig


# Gradio Interface
with gr.Blocks() as app:
    gr.Markdown("## 📝 Sentiment Analysis with TextBlob")
    gr.Markdown("Upload a CSV file, select a text column, and set thresholds for sentiment classification.")
    gr.Markdown("**Citation:** Mat Roni, S. (2025). *Sentiment analysis with TextBlob on Gradio* (version 1.1) [software]. https://huggingface.co/spaces/pvaluedotone/textblob-sentiment-app")

    with gr.Row():
        file_input = gr.File(label="Upload CSV File")
        column_dropdown = gr.Dropdown(label="Select Text Column", choices=[])

    file_input.change(get_text_columns, inputs=file_input, outputs=column_dropdown)

    with gr.Row():
        pos_thresh_slider = gr.Slider(minimum=0.0, maximum=1.0, value=0.1, step=0.01, label="Positive Threshold")
        neg_thresh_slider = gr.Slider(minimum=-1.0, maximum=0.0, value=-0.1, step=0.01, label="Negative Threshold")

    analyze_button = gr.Button("Run Sentiment Analysis")

    status_box = gr.Textbox(label="Status", interactive=False)
    data_output = gr.Dataframe(label="Sample results")
    plot1 = gr.Plot(label="Sentiment Label Distribution")
    plot2 = gr.Plot(label="Polarity Distribution")
    pos_wordcloud = gr.Plot(label="Positive Word Cloud")
    neg_wordcloud = gr.Plot(label="Negative Word Cloud")
    neu_wordcloud = gr.Plot(label="Neutral Word Cloud")

    csv_download = gr.File(label="Download Full Results")

    analyze_button.click(
        fn=analyze_sentiment,
        inputs=[file_input, column_dropdown, pos_thresh_slider, neg_thresh_slider],
        outputs=[
            status_box,
            data_output,
            plot1,
            plot2,
            csv_download,
            pos_wordcloud,
            neg_wordcloud,
            neu_wordcloud
        ]
    )


app.launch(share=True, debug=True)