Spaces:

pvaluedotone
/

VADER_sentiment_analysis

Sleeping

File size: 4,148 Bytes

d0d1bb3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b6e5f0c
d0d1bb3
 
b6e5f0c
d0d1bb3
 
 
 
 
 
 
 
 
 
 
 
b6e5f0c
d0d1bb3
 
 
 
 
 
 
 
42148b2
 
d0d1bb3
 
 
 
 
b6e5f0c
 
d0d1bb3
b6e5f0c
d0d1bb3
 
b6e5f0c
 
 
 
 
 
 
 
 
d0d1bb3
42148b2
b6e5f0c
674bf1a
d0d1bb3
 
b6e5f0c
 
d0d1bb3
 
 
 
 
 
 
b6e5f0c
d0d1bb3
b6e5f0c
 
 
 
d0d1bb3
 
b6e5f0c
d0d1bb3
b6e5f0c
 
d0d1bb3
 
b6e5f0c

import pandas as pd
import re
import nltk
import gradio as gr
import matplotlib.pyplot as plt
import seaborn as sns
from nltk.sentiment import SentimentIntensityAnalyzer

nltk.download("vader_lexicon")
sia = SentimentIntensityAnalyzer()

def clean_text(text):
    if not isinstance(text, str):
        return ""
    text = re.sub(r"http\S+", "", text)
    text = re.sub(r"@\w+|#\w+", "", text)
    text = re.sub(r"[^\w\s]", "", text)
    text = text.lower().strip()
    return text

def get_sentiment_label(score, pos_thresh, neg_thresh):
    if score >= pos_thresh:
        return "Positive"
    elif score <= neg_thresh:
        return "Negative"
    else:
        return "Neutral"

def analyze_sentiment(file, text_column, pos_thresh, neg_thresh):
    try:
        df = pd.read_csv(file.name)
    except Exception as e:
        return f"Error reading CSV file: {e}", None, None, None, None

    if text_column not in df.columns:
        return "Selected column not found.", None, None, None, None

    df["clean_text"] = df[text_column].apply(clean_text)
    df["compound"] = df["clean_text"].apply(lambda x: sia.polarity_scores(x)["compound"])
    df["sentiment"] = df["compound"].apply(lambda score: get_sentiment_label(score, pos_thresh, neg_thresh))

    # Save CSV
    output_file = "VADER_sentiment_results.csv"
    df.to_csv(output_file, index=False)

    # Plot 1: Sentiment distribution
    plt.figure(figsize=(6, 4))
    sns.countplot(data=df, x="sentiment", palette="Set2")
    plt.title("Sentiment Distribution")
    plt.tight_layout()
    sentiment_fig = "sentiment_dist.png"
    plt.savefig(sentiment_fig)
    plt.close()

    # Plot 2: Compound score histogram
    plt.figure(figsize=(6, 4))
    sns.histplot(df["compound"], bins=30, kde=True, color="skyblue")
    plt.title("Compound score distribution")
    plt.xlabel("Compound score")
    plt.tight_layout()
    compound_fig = "compound_dist.png"
    plt.savefig(compound_fig)
    plt.close()

    # Sample preview
    preview = df[[text_column, "compound", "sentiment"]].head(10)

    return f"Sentiment analysis complete. Processed {len(df)} rows.", preview, output_file, sentiment_fig, compound_fig

def get_text_columns(file):
    try:
        df = pd.read_csv(file.name, nrows=1)
        text_columns = df.select_dtypes(include='object').columns.tolist()
        if not text_columns:
            return gr.update(choices=[], value=None, label="⚠️ No text columns found!")
        return gr.update(choices=text_columns, value=text_columns[0])
    except Exception:
        return gr.update(choices=[], value=None, label="⚠️ Error reading file")

with gr.Blocks() as app:
    gr.Markdown("## Sentiment analysis with VADER")
    gr.Markdown("Upload a CSV, choose a text column, adjust sentiment thresholds, and run analysis.")
    gr.Markdown("**Citation:** Mat Roni, S. (2025). *Sentiment analysis with VADER on Gradio* (version 1.0) [software]. https://huggingface.co/spaces/pvaluedotone/VADER_sentiment_analysis")

    with gr.Row():
        file_input = gr.File(label="Upload CSV", file_types=[".csv"])
        column_dropdown = gr.Dropdown(label="Select Text Column", choices=[], interactive=True)

    file_input.change(get_text_columns, inputs=file_input, outputs=column_dropdown)

    with gr.Row():
        pos_thresh_slider = gr.Slider(minimum=0.0, maximum=1.0, value=0.05, step=0.01, label="Positive Threshold")
        neg_thresh_slider = gr.Slider(minimum=-1.0, maximum=0.0, value=-0.05, step=0.01, label="Negative Threshold")

    analyze_button = gr.Button("Run Sentiment Analysis")

    status_box = gr.Textbox(label="Status")
    data_output = gr.Dataframe(label="Sample Output (Top 10)")
    file_output = gr.File(label="Download Full Results")
    sentiment_plot = gr.Image(label="Sentiment Label Distribution")
    compound_plot = gr.Image(label="Compound Score Distribution")

    analyze_button.click(
        fn=analyze_sentiment,
        inputs=[file_input, column_dropdown, pos_thresh_slider, neg_thresh_slider],
        outputs=[status_box, data_output, file_output, sentiment_plot, compound_plot]
    )

app.launch(debug=True, share=True)