File size: 4,148 Bytes
d0d1bb3 b6e5f0c d0d1bb3 b6e5f0c d0d1bb3 b6e5f0c d0d1bb3 42148b2 d0d1bb3 b6e5f0c d0d1bb3 b6e5f0c d0d1bb3 b6e5f0c d0d1bb3 42148b2 b6e5f0c 674bf1a d0d1bb3 b6e5f0c d0d1bb3 b6e5f0c d0d1bb3 b6e5f0c d0d1bb3 b6e5f0c d0d1bb3 b6e5f0c d0d1bb3 b6e5f0c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 |
import pandas as pd
import re
import nltk
import gradio as gr
import matplotlib.pyplot as plt
import seaborn as sns
from nltk.sentiment import SentimentIntensityAnalyzer
nltk.download("vader_lexicon")
sia = SentimentIntensityAnalyzer()
def clean_text(text):
if not isinstance(text, str):
return ""
text = re.sub(r"http\S+", "", text)
text = re.sub(r"@\w+|#\w+", "", text)
text = re.sub(r"[^\w\s]", "", text)
text = text.lower().strip()
return text
def get_sentiment_label(score, pos_thresh, neg_thresh):
if score >= pos_thresh:
return "Positive"
elif score <= neg_thresh:
return "Negative"
else:
return "Neutral"
def analyze_sentiment(file, text_column, pos_thresh, neg_thresh):
try:
df = pd.read_csv(file.name)
except Exception as e:
return f"Error reading CSV file: {e}", None, None, None, None
if text_column not in df.columns:
return "Selected column not found.", None, None, None, None
df["clean_text"] = df[text_column].apply(clean_text)
df["compound"] = df["clean_text"].apply(lambda x: sia.polarity_scores(x)["compound"])
df["sentiment"] = df["compound"].apply(lambda score: get_sentiment_label(score, pos_thresh, neg_thresh))
# Save CSV
output_file = "VADER_sentiment_results.csv"
df.to_csv(output_file, index=False)
# Plot 1: Sentiment distribution
plt.figure(figsize=(6, 4))
sns.countplot(data=df, x="sentiment", palette="Set2")
plt.title("Sentiment Distribution")
plt.tight_layout()
sentiment_fig = "sentiment_dist.png"
plt.savefig(sentiment_fig)
plt.close()
# Plot 2: Compound score histogram
plt.figure(figsize=(6, 4))
sns.histplot(df["compound"], bins=30, kde=True, color="skyblue")
plt.title("Compound score distribution")
plt.xlabel("Compound score")
plt.tight_layout()
compound_fig = "compound_dist.png"
plt.savefig(compound_fig)
plt.close()
# Sample preview
preview = df[[text_column, "compound", "sentiment"]].head(10)
return f"Sentiment analysis complete. Processed {len(df)} rows.", preview, output_file, sentiment_fig, compound_fig
def get_text_columns(file):
try:
df = pd.read_csv(file.name, nrows=1)
text_columns = df.select_dtypes(include='object').columns.tolist()
if not text_columns:
return gr.update(choices=[], value=None, label="⚠️ No text columns found!")
return gr.update(choices=text_columns, value=text_columns[0])
except Exception:
return gr.update(choices=[], value=None, label="⚠️ Error reading file")
with gr.Blocks() as app:
gr.Markdown("## Sentiment analysis with VADER")
gr.Markdown("Upload a CSV, choose a text column, adjust sentiment thresholds, and run analysis.")
gr.Markdown("**Citation:** Mat Roni, S. (2025). *Sentiment analysis with VADER on Gradio* (version 1.0) [software]. https://huggingface.co/spaces/pvaluedotone/VADER_sentiment_analysis")
with gr.Row():
file_input = gr.File(label="Upload CSV", file_types=[".csv"])
column_dropdown = gr.Dropdown(label="Select Text Column", choices=[], interactive=True)
file_input.change(get_text_columns, inputs=file_input, outputs=column_dropdown)
with gr.Row():
pos_thresh_slider = gr.Slider(minimum=0.0, maximum=1.0, value=0.05, step=0.01, label="Positive Threshold")
neg_thresh_slider = gr.Slider(minimum=-1.0, maximum=0.0, value=-0.05, step=0.01, label="Negative Threshold")
analyze_button = gr.Button("Run Sentiment Analysis")
status_box = gr.Textbox(label="Status")
data_output = gr.Dataframe(label="Sample Output (Top 10)")
file_output = gr.File(label="Download Full Results")
sentiment_plot = gr.Image(label="Sentiment Label Distribution")
compound_plot = gr.Image(label="Compound Score Distribution")
analyze_button.click(
fn=analyze_sentiment,
inputs=[file_input, column_dropdown, pos_thresh_slider, neg_thresh_slider],
outputs=[status_box, data_output, file_output, sentiment_plot, compound_plot]
)
app.launch(debug=True, share=True)
|