Spaces:

pvaluedotone
/

flair_sentiment_analysis

Sleeping

File size: 5,167 Bytes

ed5c736

import pandas as pd
import re
import tempfile
import gradio as gr
import matplotlib.pyplot as plt
import seaborn as sns
from wordcloud import WordCloud
from flair.models import TextClassifier
from flair.data import Sentence
from flair.nn import Classifier

# Load FLAIR sentiment model
classifier = Classifier.load("sentiment")

# Minimal text cleaning
def clean_text_for_flair(text):
    if pd.isnull(text):
        return ""
    text = re.sub(r"http\S+|www\S+", "", text)  # remove URLs
    text = re.sub(r"<.*?>", "", text)  # remove HTML
    text = re.sub(r"\s+", " ", text).strip()  # normalize whitespace
    return text

# Generate word clouds
def generate_wordclouds(df):
    if "sentiment" not in df.columns or "clean_text" not in df.columns:
        return None, None

    positive_text = " ".join(df[df["sentiment"] == "POSITIVE"]["clean_text"].astype(str))
    negative_text = " ".join(df[df["sentiment"] == "NEGATIVE"]["clean_text"].astype(str))

    pos_wordcloud = WordCloud(width=800, height=400, background_color='white', colormap='Greens').generate(positive_text)
    neg_wordcloud = WordCloud(width=800, height=400, background_color='white', colormap='Reds').generate(negative_text)

    pos_path = "positive_wordcloud.png"
    plt.figure(figsize=(10, 5))
    plt.imshow(pos_wordcloud, interpolation='bilinear')
    plt.axis("off")
    plt.title("Positive Word Cloud")
    plt.tight_layout()
    plt.savefig(pos_path)
    plt.close()

    neg_path = "negative_wordcloud.png"
    plt.figure(figsize=(10, 5))
    plt.imshow(neg_wordcloud, interpolation='bilinear')
    plt.axis("off")
    plt.title("Negative Word Cloud")
    plt.tight_layout()
    plt.savefig(neg_path)
    plt.close()

    return pos_path, neg_path

# Main analysis function
def analyze_sentiment_flair(file, text_column):
    try:
        df = pd.read_csv(file.name)
    except Exception as e:
        return f"Error loading file: {e}", None, None, None, None, None

    if text_column not in df.columns:
        return "Selected text column not found.", None, None, None, None, None

    df["clean_text"] = df[text_column].apply(clean_text_for_flair)

    sentiments = []
    scores = []

    for text in df["clean_text"]:
        sentence = Sentence(text)
        classifier.predict(sentence)
        label = sentence.labels[0].value
        score = sentence.labels[0].score
        sentiments.append(label)
        scores.append(score)

    df["sentiment"] = sentiments
    df["confidence"] = scores

    # Save results
    with tempfile.NamedTemporaryFile(mode='w+', delete=False, suffix=".csv") as tmp:
        df.to_csv(tmp.name, index=False)
        csv_path = tmp.name

    # Sentiment count plot
    plt.figure(figsize=(6, 4))
    sns.countplot(data=df, x="sentiment", palette="pastel")
    plt.title("Sentiment Distribution")
    plt.tight_layout()
    sentiment_plot_path = "sentiment_flair_plot.png"
    plt.savefig(sentiment_plot_path)
    plt.close()

    # Confidence score distribution
    plt.figure(figsize=(6, 4))
    sns.histplot(df["confidence"], bins=30, kde=True, color="lightblue")
    plt.title("Confidence Score Distribution")
    plt.tight_layout()
    confidence_plot_path = "confidence_flair_plot.png"
    plt.savefig(confidence_plot_path)
    plt.close()

    # Word clouds
    pos_wc_path, neg_wc_path = generate_wordclouds(df)

    return f"Sentiment analysis completed on {len(df)} rows.", csv_path, sentiment_plot_path, confidence_plot_path, pos_wc_path, neg_wc_path

# Gradio interface
with gr.Blocks() as app:
    gr.Markdown("## FLAIR-Based Sentiment Analyzer with Word Clouds")
    gr.Markdown("Upload a CSV file with text data. This tool uses [FLAIR](https://github.com/flairNLP/flair) for sentiment classification (POSITIVE / NEGATIVE), shows confidence scores, and generates word clouds for each sentiment.")

    with gr.Row():
        file_input = gr.File(label="Upload CSV", file_types=[".csv"])
        col_dropdown = gr.Dropdown(label="Select Text Column", choices=[], interactive=True)

    def get_text_columns(file):
        try:
            df = pd.read_csv(file.name)
            text_cols = df.select_dtypes(include='object').columns.tolist()
            if not text_cols:
                return gr.update(choices=[], label="⚠️ No text columns found")
            return gr.update(choices=text_cols, value=text_cols[0])
        except:
            return gr.update(choices=[], value=None)

    file_input.change(get_text_columns, inputs=file_input, outputs=col_dropdown)

    analyze_btn = gr.Button("Run Sentiment Analysis")

    output_text = gr.Textbox(label="Status")
    file_output = gr.File(label="Download Results CSV")
    sentiment_plot = gr.Image(label="Sentiment Distribution")
    confidence_plot = gr.Image(label="Confidence Score Distribution")
    wordcloud_pos = gr.Image(label="Positive Word Cloud")
    wordcloud_neg = gr.Image(label="Negative Word Cloud")

    analyze_btn.click(
        analyze_sentiment_flair,
        inputs=[file_input, col_dropdown],
        outputs=[output_text, file_output, sentiment_plot, confidence_plot, wordcloud_pos, wordcloud_neg]
    )

app.launch(share=True, debug=True)