Spaces:

tahamueed23
/

Sentiment-Analyzer

Sleeping

File size: 6,380 Bytes

import gradio as gr
from transformers import pipeline
import pandas as pd
import os
import re
from filelock import FileLock

# -----------------------------
# Load Transformer Models
# -----------------------------
english_model = pipeline(
    "sentiment-analysis",
    model="siebert/sentiment-roberta-large-english"
)

urdu_model = pipeline(
    "sentiment-analysis",
    model="tahamueed23/fine_tuned_cardiffnlp_urdu_and_roman-urdu"
)

roman_urdu_model = pipeline(
    "sentiment-analysis",
    model="tahamueed23/fine_tuned_cardiffnlp_urdu_and_roman-urdu"
)

# -----------------------------
# CSV Setup
# -----------------------------
SAVE_FILE = "sentiment_logs.csv"
LOCK_FILE = SAVE_FILE + ".lock"

if not os.path.exists(SAVE_FILE):
    pd.DataFrame(columns=["Sentence", "Language", "Sentiment", "Confidence"]).to_csv(
        SAVE_FILE, index=False, encoding="utf-8-sig"
    )

# -----------------------------
# Improved Language Detection
# -----------------------------
def detect_language(text):
    urdu_script = re.compile(r"[\u0600-\u06FF]")
    if urdu_script.search(text):
        return "Urdu"

    roman_urdu_patterns = [
        r"\b(hai|hain|tha|thi|parhta|parhai|acha|bura|bohot|zabardast)\b",
        r"\b(sir|madam|ustad|class|parh|samajh)\b",
    ]

    text_l = text.lower()
    for p in roman_urdu_patterns:
        if re.search(p, text_l):
            return "Roman Urdu"

    return "English"

# -----------------------------
# Roman Urdu Normalization
# -----------------------------
def normalize_roman_urdu(text):
    text = text.lower()
    text = text.replace("hy", "hai").replace("h", "hai")
    text = re.sub(r"\bnhi\b|\bnai\b|\bnhi\b", "nahi", text)
    return text

# -----------------------------
# Normalize Labels
# -----------------------------
def normalize_label(label):
    label = label.lower()
    if "pos" in label or "positive" in label:
        return "Positive"
    elif "neg" in label or "negative" in label:
        return "Negative"
    else:
        return "Neutral"

# -----------------------------
# Polarity Explanation
# -----------------------------
def polarity_explanation(text, sentiment):
    explanations = {
        "Positive": "Contains praise words or positive evaluation.",
        "Negative": "Contains criticism or negative expressions.",
        "Neutral": "Factual statement or balanced observation."
    }
    return explanations.get(sentiment, "")

# -----------------------------
# Ensemble Roman Urdu + Urdu
# -----------------------------
def ensemble_roman_urdu(text):
    ru = roman_urdu_model(text)[0]
    ur = urdu_model(text)[0]

    ru_sent, ur_sent = normalize_label(ru["label"]), normalize_label(ur["label"])

    if ru_sent == ur_sent:
        return ru if ru["score"] >= ur["score"] else ur

    # Weight Roman Urdu higher for Roman Urdu input
    weight_ru = ru["score"] * 1.25
    weight_ur = ur["score"]
    return ru if weight_ru >= weight_ur else ur

# -----------------------------
# Adjust sentiment if low intensity
# -----------------------------
def adjust_for_neutral(text, sentiment, score):
    if sentiment in ["Positive", "Negative"] and score < 0.7:
        return "Neutral", score
    return sentiment, score

# -----------------------------
# Main Analysis Function
# -----------------------------
def analyze_sentiment(text, lang_hint):
    if not text.strip():
        return "⚠️ Please enter a sentence.", "", "", SAVE_FILE

    lang = lang_hint if lang_hint != "Auto Detect" else detect_language(text)

    if lang == "English":
        result = english_model(text)[0]
    elif lang == "Urdu":
        result = urdu_model(text)[0]
    else:
        text = normalize_roman_urdu(text)
        result = ensemble_roman_urdu(text)

    sentiment = normalize_label(result["label"])
    score = round(float(result["score"]), 3)
    sentiment, score = adjust_for_neutral(text, sentiment, score)
    explanation = polarity_explanation(text, sentiment)

    # Save logs
    with FileLock(LOCK_FILE):
        df = pd.read_csv(SAVE_FILE, encoding="utf-8-sig") \
            if os.path.exists(SAVE_FILE) else pd.DataFrame(columns=["Sentence", "Language", "Sentiment", "Confidence"])
        new_row = pd.DataFrame([[text, lang, sentiment, score]],
                               columns=["Sentence", "Language", "Sentiment", "Confidence"])
        df = pd.concat([df, new_row], ignore_index=True)
        df.to_csv(SAVE_FILE, index=False, encoding="utf-8-sig")

    return sentiment, str(score), explanation, SAVE_FILE

# -----------------------------
# Show Logs
# -----------------------------
def show_logs():
    if os.path.exists(SAVE_FILE):
        return pd.read_csv(SAVE_FILE, encoding="utf-8-sig")
    else:
        return pd.DataFrame(columns=["Sentence", "Language", "Sentiment", "Confidence"])

# -----------------------------
# Gradio UI
# -----------------------------
with gr.Blocks() as demo:
    gr.Markdown(
        "## 🌍 Multilingual Sentiment Analysis (English • Urdu • Roman Urdu)\n"
        "Detect **Positive**, **Negative**, or **Neutral** tone with confidence score.\n\n"
        "🪶 Improved Roman Urdu normalization + ensemble + polarity explanation.\n"
    )

    with gr.Row():
        with gr.Column():
            user_text = gr.Textbox(label="✍️ Enter text", placeholder="Type English, Urdu, or Roman Urdu...")
            lang_dropdown = gr.Dropdown(
                ["Auto Detect", "English", "Urdu", "Roman Urdu"],
                value="Auto Detect", label="🌐 Language"
            )
            btn_analyze = gr.Button("🔍 Analyze Sentiment")
            btn_show = gr.Button("📂 Show Saved Logs")

        with gr.Column():
            out_sent = gr.Textbox(label="Sentiment")
            out_conf = gr.Textbox(label="Confidence (0–1)")
            out_exp = gr.Textbox(label="Polarity Explanation")
            out_file = gr.File(label="⬇️ Download Logs (.csv)", type="filepath")

    logs_df = gr.Dataframe(
        headers=["Sentence", "Language", "Sentiment", "Confidence"],
        label="🧾 Sentiment Logs", interactive=False
    )

    btn_analyze.click(analyze_sentiment,
                      inputs=[user_text, lang_dropdown],
                      outputs=[out_sent, out_conf, out_exp, out_file])

    btn_show.click(show_logs, outputs=[logs_df])

if __name__ == "__main__":
    demo.launch()