Spaces:
Sleeping
Sleeping
File size: 6,380 Bytes
ea7b9be 6743c3d cd458ad 23ba503 cd458ad 7373e67 cd458ad 0780c88 cd458ad 0780c88 cd458ad 23ba503 cd458ad 6743c3d cd458ad 7373e67 cd458ad 0780c88 cd458ad 7373e67 0780c88 7373e67 cd458ad 7373e67 0780c88 7373e67 0780c88 cd458ad 6743c3d cd458ad 6743c3d cd458ad 0780c88 cd458ad 0780c88 cd458ad 0780c88 cd458ad 7373e67 0780c88 7373e67 0780c88 7373e67 0780c88 7373e67 0780c88 7373e67 cd458ad 0780c88 cd458ad 0780c88 6743c3d 0780c88 6743c3d 0780c88 6743c3d 0780c88 6743c3d 0780c88 6743c3d 0780c88 6743c3d 0780c88 cd458ad 23ba503 7373e67 23ba503 7373e67 23ba503 cd458ad 7373e67 0780c88 cd458ad 0780c88 23ba503 7373e67 23ba503 cd458ad 0780c88 cd458ad 23ba503 0780c88 23ba503 3840bbb 0780c88 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 |
import gradio as gr
from transformers import pipeline
import pandas as pd
import os
import re
from filelock import FileLock
# -----------------------------
# Load Transformer Models
# -----------------------------
english_model = pipeline(
"sentiment-analysis",
model="siebert/sentiment-roberta-large-english"
)
urdu_model = pipeline(
"sentiment-analysis",
model="tahamueed23/fine_tuned_cardiffnlp_urdu_and_roman-urdu"
)
roman_urdu_model = pipeline(
"sentiment-analysis",
model="tahamueed23/fine_tuned_cardiffnlp_urdu_and_roman-urdu"
)
# -----------------------------
# CSV Setup
# -----------------------------
SAVE_FILE = "sentiment_logs.csv"
LOCK_FILE = SAVE_FILE + ".lock"
if not os.path.exists(SAVE_FILE):
pd.DataFrame(columns=["Sentence", "Language", "Sentiment", "Confidence"]).to_csv(
SAVE_FILE, index=False, encoding="utf-8-sig"
)
# -----------------------------
# Improved Language Detection
# -----------------------------
def detect_language(text):
urdu_script = re.compile(r"[\u0600-\u06FF]")
if urdu_script.search(text):
return "Urdu"
roman_urdu_patterns = [
r"\b(hai|hain|tha|thi|parhta|parhai|acha|bura|bohot|zabardast)\b",
r"\b(sir|madam|ustad|class|parh|samajh)\b",
]
text_l = text.lower()
for p in roman_urdu_patterns:
if re.search(p, text_l):
return "Roman Urdu"
return "English"
# -----------------------------
# Roman Urdu Normalization
# -----------------------------
def normalize_roman_urdu(text):
text = text.lower()
text = text.replace("hy", "hai").replace("h", "hai")
text = re.sub(r"\bnhi\b|\bnai\b|\bnhi\b", "nahi", text)
return text
# -----------------------------
# Normalize Labels
# -----------------------------
def normalize_label(label):
label = label.lower()
if "pos" in label or "positive" in label:
return "Positive"
elif "neg" in label or "negative" in label:
return "Negative"
else:
return "Neutral"
# -----------------------------
# Polarity Explanation
# -----------------------------
def polarity_explanation(text, sentiment):
explanations = {
"Positive": "Contains praise words or positive evaluation.",
"Negative": "Contains criticism or negative expressions.",
"Neutral": "Factual statement or balanced observation."
}
return explanations.get(sentiment, "")
# -----------------------------
# Ensemble Roman Urdu + Urdu
# -----------------------------
def ensemble_roman_urdu(text):
ru = roman_urdu_model(text)[0]
ur = urdu_model(text)[0]
ru_sent, ur_sent = normalize_label(ru["label"]), normalize_label(ur["label"])
if ru_sent == ur_sent:
return ru if ru["score"] >= ur["score"] else ur
# Weight Roman Urdu higher for Roman Urdu input
weight_ru = ru["score"] * 1.25
weight_ur = ur["score"]
return ru if weight_ru >= weight_ur else ur
# -----------------------------
# Adjust sentiment if low intensity
# -----------------------------
def adjust_for_neutral(text, sentiment, score):
if sentiment in ["Positive", "Negative"] and score < 0.7:
return "Neutral", score
return sentiment, score
# -----------------------------
# Main Analysis Function
# -----------------------------
def analyze_sentiment(text, lang_hint):
if not text.strip():
return "⚠️ Please enter a sentence.", "", "", SAVE_FILE
lang = lang_hint if lang_hint != "Auto Detect" else detect_language(text)
if lang == "English":
result = english_model(text)[0]
elif lang == "Urdu":
result = urdu_model(text)[0]
else:
text = normalize_roman_urdu(text)
result = ensemble_roman_urdu(text)
sentiment = normalize_label(result["label"])
score = round(float(result["score"]), 3)
sentiment, score = adjust_for_neutral(text, sentiment, score)
explanation = polarity_explanation(text, sentiment)
# Save logs
with FileLock(LOCK_FILE):
df = pd.read_csv(SAVE_FILE, encoding="utf-8-sig") \
if os.path.exists(SAVE_FILE) else pd.DataFrame(columns=["Sentence", "Language", "Sentiment", "Confidence"])
new_row = pd.DataFrame([[text, lang, sentiment, score]],
columns=["Sentence", "Language", "Sentiment", "Confidence"])
df = pd.concat([df, new_row], ignore_index=True)
df.to_csv(SAVE_FILE, index=False, encoding="utf-8-sig")
return sentiment, str(score), explanation, SAVE_FILE
# -----------------------------
# Show Logs
# -----------------------------
def show_logs():
if os.path.exists(SAVE_FILE):
return pd.read_csv(SAVE_FILE, encoding="utf-8-sig")
else:
return pd.DataFrame(columns=["Sentence", "Language", "Sentiment", "Confidence"])
# -----------------------------
# Gradio UI
# -----------------------------
with gr.Blocks() as demo:
gr.Markdown(
"## 🌍 Multilingual Sentiment Analysis (English • Urdu • Roman Urdu)\n"
"Detect **Positive**, **Negative**, or **Neutral** tone with confidence score.\n\n"
"🪶 Improved Roman Urdu normalization + ensemble + polarity explanation.\n"
)
with gr.Row():
with gr.Column():
user_text = gr.Textbox(label="✍️ Enter text", placeholder="Type English, Urdu, or Roman Urdu...")
lang_dropdown = gr.Dropdown(
["Auto Detect", "English", "Urdu", "Roman Urdu"],
value="Auto Detect", label="🌐 Language"
)
btn_analyze = gr.Button("🔍 Analyze Sentiment")
btn_show = gr.Button("📂 Show Saved Logs")
with gr.Column():
out_sent = gr.Textbox(label="Sentiment")
out_conf = gr.Textbox(label="Confidence (0–1)")
out_exp = gr.Textbox(label="Polarity Explanation")
out_file = gr.File(label="⬇️ Download Logs (.csv)", type="filepath")
logs_df = gr.Dataframe(
headers=["Sentence", "Language", "Sentiment", "Confidence"],
label="🧾 Sentiment Logs", interactive=False
)
btn_analyze.click(analyze_sentiment,
inputs=[user_text, lang_dropdown],
outputs=[out_sent, out_conf, out_exp, out_file])
btn_show.click(show_logs, outputs=[logs_df])
if __name__ == "__main__":
demo.launch()
|