File size: 5,167 Bytes
ed5c736 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 |
import pandas as pd
import re
import tempfile
import gradio as gr
import matplotlib.pyplot as plt
import seaborn as sns
from wordcloud import WordCloud
from flair.models import TextClassifier
from flair.data import Sentence
from flair.nn import Classifier
# Load FLAIR sentiment model
classifier = Classifier.load("sentiment")
# Minimal text cleaning
def clean_text_for_flair(text):
if pd.isnull(text):
return ""
text = re.sub(r"http\S+|www\S+", "", text) # remove URLs
text = re.sub(r"<.*?>", "", text) # remove HTML
text = re.sub(r"\s+", " ", text).strip() # normalize whitespace
return text
# Generate word clouds
def generate_wordclouds(df):
if "sentiment" not in df.columns or "clean_text" not in df.columns:
return None, None
positive_text = " ".join(df[df["sentiment"] == "POSITIVE"]["clean_text"].astype(str))
negative_text = " ".join(df[df["sentiment"] == "NEGATIVE"]["clean_text"].astype(str))
pos_wordcloud = WordCloud(width=800, height=400, background_color='white', colormap='Greens').generate(positive_text)
neg_wordcloud = WordCloud(width=800, height=400, background_color='white', colormap='Reds').generate(negative_text)
pos_path = "positive_wordcloud.png"
plt.figure(figsize=(10, 5))
plt.imshow(pos_wordcloud, interpolation='bilinear')
plt.axis("off")
plt.title("Positive Word Cloud")
plt.tight_layout()
plt.savefig(pos_path)
plt.close()
neg_path = "negative_wordcloud.png"
plt.figure(figsize=(10, 5))
plt.imshow(neg_wordcloud, interpolation='bilinear')
plt.axis("off")
plt.title("Negative Word Cloud")
plt.tight_layout()
plt.savefig(neg_path)
plt.close()
return pos_path, neg_path
# Main analysis function
def analyze_sentiment_flair(file, text_column):
try:
df = pd.read_csv(file.name)
except Exception as e:
return f"Error loading file: {e}", None, None, None, None, None
if text_column not in df.columns:
return "Selected text column not found.", None, None, None, None, None
df["clean_text"] = df[text_column].apply(clean_text_for_flair)
sentiments = []
scores = []
for text in df["clean_text"]:
sentence = Sentence(text)
classifier.predict(sentence)
label = sentence.labels[0].value
score = sentence.labels[0].score
sentiments.append(label)
scores.append(score)
df["sentiment"] = sentiments
df["confidence"] = scores
# Save results
with tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix=".csv") as tmp:
df.to_csv(tmp.name, index=False)
csv_path = tmp.name
# Sentiment count plot
plt.figure(figsize=(6, 4))
sns.countplot(data=df, x="sentiment", palette="pastel")
plt.title("Sentiment Distribution")
plt.tight_layout()
sentiment_plot_path = "sentiment_flair_plot.png"
plt.savefig(sentiment_plot_path)
plt.close()
# Confidence score distribution
plt.figure(figsize=(6, 4))
sns.histplot(df["confidence"], bins=30, kde=True, color="lightblue")
plt.title("Confidence Score Distribution")
plt.tight_layout()
confidence_plot_path = "confidence_flair_plot.png"
plt.savefig(confidence_plot_path)
plt.close()
# Word clouds
pos_wc_path, neg_wc_path = generate_wordclouds(df)
return f"Sentiment analysis completed on {len(df)} rows.", csv_path, sentiment_plot_path, confidence_plot_path, pos_wc_path, neg_wc_path
# Gradio interface
with gr.Blocks() as app:
gr.Markdown("## FLAIR-Based Sentiment Analyzer with Word Clouds")
gr.Markdown("Upload a CSV file with text data. This tool uses [FLAIR](https://github.com/flairNLP/flair) for sentiment classification (POSITIVE / NEGATIVE), shows confidence scores, and generates word clouds for each sentiment.")
with gr.Row():
file_input = gr.File(label="Upload CSV", file_types=[".csv"])
col_dropdown = gr.Dropdown(label="Select Text Column", choices=[], interactive=True)
def get_text_columns(file):
try:
df = pd.read_csv(file.name)
text_cols = df.select_dtypes(include='object').columns.tolist()
if not text_cols:
return gr.update(choices=[], label="⚠️ No text columns found")
return gr.update(choices=text_cols, value=text_cols[0])
except:
return gr.update(choices=[], value=None)
file_input.change(get_text_columns, inputs=file_input, outputs=col_dropdown)
analyze_btn = gr.Button("Run Sentiment Analysis")
output_text = gr.Textbox(label="Status")
file_output = gr.File(label="Download Results CSV")
sentiment_plot = gr.Image(label="Sentiment Distribution")
confidence_plot = gr.Image(label="Confidence Score Distribution")
wordcloud_pos = gr.Image(label="Positive Word Cloud")
wordcloud_neg = gr.Image(label="Negative Word Cloud")
analyze_btn.click(
analyze_sentiment_flair,
inputs=[file_input, col_dropdown],
outputs=[output_text, file_output, sentiment_plot, confidence_plot, wordcloud_pos, wordcloud_neg]
)
app.launch(share=True, debug=True)
|