Spaces:

pvaluedotone
/

textblob-sentiment-app

Sleeping

File size: 5,965 Bytes

8ba6acf
 
 
 
 
 
 
 
 
 
 
 
 
5d5fca8
 
8ba6acf
 
 
 
 
 
 
 
 
 
 
 
4a5da49
 
 
8ba6acf
4a5da49
8ba6acf
 
 
 
fd0f5ba
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8ba6acf
4a5da49
bc03c5a
 
 
 
8ba6acf
 
bc03c5a
8ba6acf
 
 
 
4a5da49
8ba6acf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5d5fca8
 
 
 
 
4a5da49
 
 
8ba6acf
5d5fca8
 
 
 
 
 
 
 
 
 
 
8ba6acf
 
 
 
 
 
 
 
 
5d5fca8
 
 
 
 
 
 
 
 
 
 
 
8ba6acf
 
4a5da49
 
 
8ba6acf
 
 
 
 
 
 
4a5da49
 
 
 
ab91c58
8ba6acf
bc03c5a
ab91c58
4a5da49
 
5d5fca8
 
 
 
8ba6acf
 
 
4a5da49
 
5d5fca8
 
 
 
 
 
 
 
 
 
8ba6acf
 
5d5fca8
4a5da49

import nltk
nltk.download('punkt')

import textblob.download_corpora
textblob.download_corpora.download_all()

import pandas as pd
import re
from textblob import TextBlob
import gradio as gr
import matplotlib.pyplot as plt
import seaborn as sns
import tempfile
from wordcloud import WordCloud


# Text cleaning function
def clean_text(text):
    if pd.isnull(text):
        return ""
    text = str(text)
    text = re.sub(r"http\S+|www\S+|https\S+", '', text, flags=re.MULTILINE)
    text = re.sub(r"\@w+|\#", '', text)
    text = re.sub(r"[^A-Za-z0-9\s]+", '', text)
    text = text.lower()
    return text.strip()

# Sentiment classification using thresholds
def get_sentiment_label(polarity, pos_thresh, neg_thresh):
    if polarity >= pos_thresh:
        return "Positive"
    elif polarity <= neg_thresh:
        return "Negative"
    else:
        return "Neutral"

# Generate word cloud
def generate_wordcloud(text_series, title):
    text = " ".join(text_series.dropna())
    if not text.strip():
        fig = plt.figure(figsize=(6, 4))
        plt.text(0.5, 0.5, f"No data for {title}", fontsize=14, ha='center', va='center')
        plt.axis("off")
        plt.title(title)
        plt.tight_layout()
        return fig

    wc = WordCloud(width=600, height=400, background_color="white", colormap="tab10").generate(text)
    fig = plt.figure(figsize=(6, 4))
    plt.imshow(wc, interpolation="bilinear")
    plt.axis("off")
    plt.title(title)
    plt.tight_layout()
    return fig


# Main processing function
def analyze_sentiment(file, text_column, pos_thresh, neg_thresh):
    try:
        df = pd.read_csv(file)
    except Exception as e:
        return f"❌ Error reading CSV file: {e}", None, None, None, None

    if text_column not in df.columns:
        return "⚠️ Selected column not found in the uploaded file.", None, None, None, None

    df["clean_text"] = df[text_column].apply(clean_text)
    df["polarity"] = df["clean_text"].apply(lambda x: TextBlob(x).sentiment.polarity)
    df["subjectivity"] = df["clean_text"].apply(lambda x: TextBlob(x).sentiment.subjectivity)
    df["sentiment"] = df["polarity"].apply(lambda p: get_sentiment_label(p, pos_thresh, neg_thresh))

    # Plot sentiment distribution
    fig1 = plt.figure(figsize=(6, 4))
    sns.countplot(data=df, x="sentiment", hue="sentiment", palette="Set2", legend=False)
    plt.title("Sentiment Label Distribution")
    plt.tight_layout()

    # Plot polarity distribution
    fig2 = plt.figure(figsize=(6, 4))
    sns.histplot(df["polarity"], bins=30, kde=True, color="skyblue")
    plt.title("Polarity Score Distribution")
    plt.tight_layout()

    # Preview table
    preview_df = df[[text_column, "clean_text", "polarity", "subjectivity", "sentiment"]].head(10)

    # Word Clouds per sentiment
    pos_wc = generate_wordcloud(df[df["sentiment"] == "Positive"]["clean_text"], "Positive Word Cloud")
    neg_wc = generate_wordcloud(df[df["sentiment"] == "Negative"]["clean_text"], "Negative Word Cloud")
    neu_wc = generate_wordcloud(df[df["sentiment"] == "Neutral"]["clean_text"], "Neutral Word Cloud")

    # Save full results
    output_file_path = "TextBlob_sentiment_results.csv"
    df.to_csv(output_file_path, index=False)

    return (
        f"✅ Sentiment analysis complete. Processed {len(df)} rows.",
        preview_df,
        fig1,
        fig2,
        output_file_path,
        pos_wc,
        neg_wc,
        neu_wc
    )


# Dropdown update function
def get_text_columns(file):
    df = pd.read_csv(file)
    text_columns = df.select_dtypes(include='object').columns.tolist()
    if not text_columns:
        return gr.update(choices=[], value=None, label="⚠️ No text columns found!")
    return gr.update(choices=text_columns, value=text_columns[0])

# Word cloud function 
def generate_wordcloud(text_series, title):
    text = " ".join(text_series.dropna())
    wc = WordCloud(width=600, height=400, background_color="white", colormap="tab10").generate(text)
    fig = plt.figure(figsize=(6, 4))
    plt.imshow(wc, interpolation="bilinear")
    plt.axis("off")
    plt.title(title)
    plt.tight_layout()
    return fig


# Gradio Interface
with gr.Blocks() as app:
    gr.Markdown("## 📝 Sentiment Analysis with TextBlob")
    gr.Markdown("Upload a CSV file, select a text column, and set thresholds for sentiment classification.")
    gr.Markdown("**Citation:** Mat Roni, S. (2025). *Sentiment analysis with TextBlob on Gradio* (version 1.1) [software]. https://huggingface.co/spaces/pvaluedotone/textblob-sentiment-app")

    with gr.Row():
        file_input = gr.File(label="Upload CSV File")
        column_dropdown = gr.Dropdown(label="Select Text Column", choices=[])

    file_input.change(get_text_columns, inputs=file_input, outputs=column_dropdown)

    with gr.Row():
        pos_thresh_slider = gr.Slider(minimum=0.0, maximum=1.0, value=0.1, step=0.01, label="Positive Threshold")
        neg_thresh_slider = gr.Slider(minimum=-1.0, maximum=0.0, value=-0.1, step=0.01, label="Negative Threshold")

    analyze_button = gr.Button("Run Sentiment Analysis")

    status_box = gr.Textbox(label="Status", interactive=False)
    data_output = gr.Dataframe(label="Sample results")
    plot1 = gr.Plot(label="Sentiment Label Distribution")
    plot2 = gr.Plot(label="Polarity Distribution")
    pos_wordcloud = gr.Plot(label="Positive Word Cloud")
    neg_wordcloud = gr.Plot(label="Negative Word Cloud")
    neu_wordcloud = gr.Plot(label="Neutral Word Cloud")

    csv_download = gr.File(label="Download Full Results")

    analyze_button.click(
        fn=analyze_sentiment,
        inputs=[file_input, column_dropdown, pos_thresh_slider, neg_thresh_slider],
        outputs=[
            status_box,
            data_output,
            plot1,
            plot2,
            csv_download,
            pos_wordcloud,
            neg_wordcloud,
            neu_wordcloud
        ]
    )


app.launch(share=True, debug=True)