DivYonko commited on
Commit Β·
b9d31ba
1
Parent(s): 1baaf30
Replace ML ensemble with pure keyword sentiment engine
Browse files- Dockerfile +0 -3
- README.md +7 -5
- ml/sentiment_model.py +373 -264
- requirements.txt +1 -6
Dockerfile
CHANGED
|
@@ -13,9 +13,6 @@ RUN pip install --no-cache-dir -r requirements.txt
|
|
| 13 |
|
| 14 |
COPY . .
|
| 15 |
|
| 16 |
-
# Suppress Streamlit's file watcher scanning transformers (harmless but noisy)
|
| 17 |
-
ENV STREAMLIT_SERVER_FILE_WATCHER_TYPE=none
|
| 18 |
-
|
| 19 |
EXPOSE 7860
|
| 20 |
|
| 21 |
HEALTHCHECK CMD curl --fail http://localhost:7860/_stcore/health
|
|
|
|
| 13 |
|
| 14 |
COPY . .
|
| 15 |
|
|
|
|
|
|
|
|
|
|
| 16 |
EXPOSE 7860
|
| 17 |
|
| 18 |
HEALTHCHECK CMD curl --fail http://localhost:7860/_stcore/health
|
README.md
CHANGED
|
@@ -15,12 +15,14 @@ Real-time Hinglish sentiment and topic analysis for YouTube live streams.
|
|
| 15 |
|
| 16 |
## Features
|
| 17 |
|
| 18 |
-
- Real-time chat scraping via
|
| 19 |
-
- Sentiment classification (Positive / Neutral / Negative) using a
|
| 20 |
-
-
|
| 21 |
-
-
|
| 22 |
-
-
|
|
|
|
| 23 |
- Topic classification (Appreciation / Question / Promo / Spam / MCQ Answer / General)
|
|
|
|
| 24 |
- Interactive Streamlit dashboard with live auto-refresh
|
| 25 |
- Start/stop scraper directly from the UI
|
| 26 |
- Multi-stream comparison (up to 5 streams)
|
|
|
|
| 15 |
|
| 16 |
## Features
|
| 17 |
|
| 18 |
+
- Real-time chat scraping via YouTube Data API v3
|
| 19 |
+
- Sentiment classification (Positive / Neutral / Negative) using a pure keyword engine
|
| 20 |
+
- Expanded Hinglish + English + regional slang keyword sets
|
| 21 |
+
- Negation handling ("nahi accha" β Negative)
|
| 22 |
+
- Intensifier boost ("bahut accha" β higher confidence)
|
| 23 |
+
- Emoji sentiment scoring
|
| 24 |
- Topic classification (Appreciation / Question / Promo / Spam / MCQ Answer / General)
|
| 25 |
+
- Action type classification (28 fine-grained categories, fully keyword-based)
|
| 26 |
- Interactive Streamlit dashboard with live auto-refresh
|
| 27 |
- Start/stop scraper directly from the UI
|
| 28 |
- Multi-stream comparison (up to 5 streams)
|
ml/sentiment_model.py
CHANGED
|
@@ -1,305 +1,414 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
from __future__ import annotations
|
| 2 |
|
| 3 |
import re
|
| 4 |
-
import threading
|
| 5 |
-
from functools import lru_cache
|
| 6 |
|
| 7 |
import emoji
|
| 8 |
-
import torch
|
| 9 |
-
import torch.nn.functional as F
|
| 10 |
-
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
# ββ Model paths ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 14 |
-
MURIL_MODEL = "./new_trained_data/muril-sentimix"
|
| 15 |
-
XLMR_MODEL = "cardiffnlp/twitter-xlm-roberta-base-sentiment"
|
| 16 |
-
MULTI_MODEL = "tabularisai/multilingual-sentiment-analysis"
|
| 17 |
-
|
| 18 |
-
LABELS = ["Negative", "Neutral", "Positive"]
|
| 19 |
-
|
| 20 |
-
# Weights
|
| 21 |
-
MURIL_WEIGHT = 0.40
|
| 22 |
-
XLMR_WEIGHT = 0.35
|
| 23 |
-
MULTI_WEIGHT = 0.25
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
# ββ Lazy loading βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 27 |
-
_lock = threading.Lock()
|
| 28 |
-
|
| 29 |
-
_muril_tokenizer = _muril_model = None
|
| 30 |
-
_xlmr_tokenizer = _xlmr_model = None
|
| 31 |
-
_multi_tokenizer = _multi_model = None
|
| 32 |
-
_models_loaded = False
|
| 33 |
-
_load_error: Exception | None = None
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
def _load_models():
|
| 37 |
-
global _muril_tokenizer, _muril_model
|
| 38 |
-
global _xlmr_tokenizer, _xlmr_model
|
| 39 |
-
global _multi_tokenizer, _multi_model
|
| 40 |
-
global _models_loaded, _load_error
|
| 41 |
-
|
| 42 |
-
if _models_loaded:
|
| 43 |
-
return
|
| 44 |
-
|
| 45 |
-
with _lock:
|
| 46 |
-
if _models_loaded:
|
| 47 |
-
return
|
| 48 |
-
|
| 49 |
-
print("[sentiment] Loading models...")
|
| 50 |
-
try:
|
| 51 |
-
_muril_tokenizer = AutoTokenizer.from_pretrained(MURIL_MODEL)
|
| 52 |
-
_muril_model = AutoModelForSequenceClassification.from_pretrained(MURIL_MODEL)
|
| 53 |
-
print(f"[sentiment] MuRIL loaded β id2label: {_muril_model.config.id2label}")
|
| 54 |
-
|
| 55 |
-
_xlmr_tokenizer = AutoTokenizer.from_pretrained(XLMR_MODEL)
|
| 56 |
-
_xlmr_model = AutoModelForSequenceClassification.from_pretrained(XLMR_MODEL)
|
| 57 |
-
print(f"[sentiment] XLM-R loaded β id2label: {_xlmr_model.config.id2label}")
|
| 58 |
-
|
| 59 |
-
_multi_tokenizer = AutoTokenizer.from_pretrained(MULTI_MODEL)
|
| 60 |
-
_multi_model = AutoModelForSequenceClassification.from_pretrained(MULTI_MODEL)
|
| 61 |
-
print(f"[sentiment] Multilingual loaded β id2label: {_multi_model.config.id2label}")
|
| 62 |
-
|
| 63 |
-
_muril_model.eval()
|
| 64 |
-
_xlmr_model.eval()
|
| 65 |
-
_multi_model.eval()
|
| 66 |
-
|
| 67 |
-
if torch.cuda.is_available():
|
| 68 |
-
_muril_model.to("cuda")
|
| 69 |
-
_xlmr_model.to("cuda")
|
| 70 |
-
_multi_model.to("cuda")
|
| 71 |
-
|
| 72 |
-
_models_loaded = True
|
| 73 |
-
print("[sentiment] All models ready β")
|
| 74 |
-
|
| 75 |
-
except Exception as exc:
|
| 76 |
-
_load_error = exc
|
| 77 |
-
print(f"[sentiment] ERROR loading models: {exc}")
|
| 78 |
-
raise
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
def _device():
|
| 82 |
-
if not _models_loaded:
|
| 83 |
-
_load_models()
|
| 84 |
-
return next(_muril_model.parameters()).device
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
# ββ Text normalization βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 88 |
-
def _normalize_repeated_chars(text: str) -> str:
|
| 89 |
-
return re.sub(r"(.)\1{2,}", r"\1\1", text)
|
| 90 |
|
| 91 |
|
| 92 |
# ββ Emoji scoring ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 93 |
-
|
| 94 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
|
| 96 |
|
| 97 |
-
def _emoji_score(text: str):
|
| 98 |
-
|
|
|
|
| 99 |
for ch in text:
|
| 100 |
if emoji.is_emoji(ch):
|
| 101 |
name = emoji.demojize(ch)
|
| 102 |
-
if any(k in name for k in
|
| 103 |
-
score += 0.
|
| 104 |
-
elif any(k in name for k in
|
| 105 |
-
score -= 0.
|
| 106 |
-
return score
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
# ββ
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
| 113 |
-
"
|
| 114 |
-
"
|
| 115 |
-
|
| 116 |
-
"
|
| 117 |
-
"
|
| 118 |
-
"
|
| 119 |
-
"
|
| 120 |
-
"
|
| 121 |
-
"ekdum": "absolutely",
|
| 122 |
-
"shandar": "splendid",
|
| 123 |
-
"lajawaab": "outstanding",
|
| 124 |
-
"waah": "wow great",
|
| 125 |
-
"wah": "wow great",
|
| 126 |
-
"superb": "excellent",
|
| 127 |
-
"osm": "awesome",
|
| 128 |
-
"awsm": "awesome",
|
| 129 |
-
"gr8": "great",
|
| 130 |
-
"lajawab": "outstanding",
|
| 131 |
-
"dhansu": "awesome",
|
| 132 |
-
"fatafat": "excellent quick",
|
| 133 |
-
"mazza": "fun enjoyable",
|
| 134 |
-
"maja": "fun enjoyable",
|
| 135 |
-
"acha": "good",
|
| 136 |
-
"accha": "good",
|
| 137 |
-
"badhiya": "very good",
|
| 138 |
-
"shukriya": "thank you grateful",
|
| 139 |
-
"dhanyawad": "thank you grateful",
|
| 140 |
-
"love": "love positive",
|
| 141 |
-
"pyaar": "love positive",
|
| 142 |
-
|
| 143 |
-
# Negative
|
| 144 |
-
"bakwas": "nonsense bad",
|
| 145 |
-
"faltu": "useless bad",
|
| 146 |
-
"bekar": "useless bad",
|
| 147 |
-
"ghatiya": "terrible bad",
|
| 148 |
-
"wahiyat": "awful bad",
|
| 149 |
-
"bura": "bad negative",
|
| 150 |
-
"kharab": "bad negative",
|
| 151 |
-
"boring": "boring negative",
|
| 152 |
-
"bekaar": "useless bad",
|
| 153 |
-
"chutiya": "stupid offensive",
|
| 154 |
-
"ullu": "fool negative",
|
| 155 |
-
"pagal": "crazy negative",
|
| 156 |
-
"besharam": "shameless negative",
|
| 157 |
-
"nafrat": "hate negative",
|
| 158 |
-
"gussa": "angry negative",
|
| 159 |
-
"naraaz": "angry upset",
|
| 160 |
-
"dukh": "sad negative",
|
| 161 |
-
"takleef": "pain negative",
|
| 162 |
-
"mushkil": "difficult negative",
|
| 163 |
-
"problem": "problem negative",
|
| 164 |
}
|
| 165 |
|
|
|
|
|
|
|
| 166 |
|
| 167 |
-
def _preprocess(text: str) -> str:
|
| 168 |
-
text = _normalize_repeated_chars(text)
|
| 169 |
|
| 170 |
-
|
| 171 |
-
|
| 172 |
-
replace=lambda ch, data_dict: f" {emoji.demojize(ch).strip(':')} " if emoji.is_emoji(ch) else ch
|
| 173 |
-
)
|
| 174 |
|
| 175 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 176 |
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 183 |
|
| 184 |
-
|
| 185 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 186 |
|
| 187 |
-
return text.strip()
|
| 188 |
|
|
|
|
| 189 |
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
|
|
|
|
|
|
|
|
|
| 197 |
|
| 198 |
|
| 199 |
-
|
| 200 |
-
stripped = text.strip().lower()
|
| 201 |
|
| 202 |
-
|
| 203 |
-
|
|
|
|
| 204 |
|
| 205 |
-
|
|
|
|
|
|
|
| 206 |
|
| 207 |
-
|
| 208 |
-
|
| 209 |
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 214 |
|
| 215 |
-
|
|
|
|
|
|
|
|
|
|
| 216 |
|
|
|
|
|
|
|
|
|
|
| 217 |
|
| 218 |
-
#
|
| 219 |
-
|
| 220 |
-
_CANONICAL = ["Negative", "Neutral", "Positive"]
|
| 221 |
|
| 222 |
-
# Normalise a label string so casing/spacing differences don't matter β used in _align_probs
|
| 223 |
|
|
|
|
| 224 |
|
| 225 |
-
def
|
| 226 |
"""
|
| 227 |
-
|
| 228 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 229 |
"""
|
| 230 |
-
|
| 231 |
-
_5CLASS_MAP = {
|
| 232 |
-
"very negative": 0, "negative": 0, "neg": 0,
|
| 233 |
-
"neutral": 1, "neu": 1,
|
| 234 |
-
"positive": 2, "pos": 2, "very positive": 2,
|
| 235 |
-
}
|
| 236 |
-
_3CLASS_MAP = {
|
| 237 |
-
"negative": 0, "neg": 0,
|
| 238 |
-
"neutral": 1, "neu": 1,
|
| 239 |
-
"positive": 2, "pos": 2,
|
| 240 |
-
}
|
| 241 |
-
label_map = _5CLASS_MAP if len(id2label) == 5 else _3CLASS_MAP
|
| 242 |
-
try:
|
| 243 |
-
aligned = torch.zeros(3, device=probs.device)
|
| 244 |
-
for native_idx, label in id2label.items():
|
| 245 |
-
canonical_idx = label_map[label.lower()]
|
| 246 |
-
aligned[canonical_idx] += probs[native_idx]
|
| 247 |
-
return aligned
|
| 248 |
-
except (KeyError, IndexError):
|
| 249 |
-
print(f"[sentiment] WARNING: could not align labels {id2label}, using raw order")
|
| 250 |
-
return probs[:3]
|
| 251 |
-
|
| 252 |
-
|
| 253 |
-
def _infer_aligned(tokenizer, model, text: str) -> torch.Tensor:
|
| 254 |
-
"""Run inference and return probs aligned to [Negative, Neutral, Positive]."""
|
| 255 |
-
device = _device()
|
| 256 |
-
|
| 257 |
-
inputs = tokenizer(
|
| 258 |
-
text,
|
| 259 |
-
return_tensors="pt",
|
| 260 |
-
truncation=True,
|
| 261 |
-
max_length=128,
|
| 262 |
-
padding=True,
|
| 263 |
-
).to(device)
|
| 264 |
-
|
| 265 |
-
with torch.no_grad():
|
| 266 |
-
logits = model(**inputs).logits
|
| 267 |
-
|
| 268 |
-
probs = F.softmax(logits, dim=-1).squeeze()
|
| 269 |
-
return _align_probs(probs, model.config.id2label)
|
| 270 |
-
|
| 271 |
-
|
| 272 |
-
# ββ Ensemble βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 273 |
-
@lru_cache(maxsize=512)
|
| 274 |
-
def _ensemble(text):
|
| 275 |
-
_load_models()
|
| 276 |
-
|
| 277 |
-
p_muril = _infer_aligned(_muril_tokenizer, _muril_model, text)
|
| 278 |
-
p_xlmr = _infer_aligned(_xlmr_tokenizer, _xlmr_model, text)
|
| 279 |
-
p_multi = _infer_aligned(_multi_tokenizer, _multi_model, text)
|
| 280 |
-
|
| 281 |
-
probs = MURIL_WEIGHT * p_muril + XLMR_WEIGHT * p_xlmr + MULTI_WEIGHT * p_multi
|
| 282 |
-
|
| 283 |
-
conf, idx = torch.max(probs, dim=0)
|
| 284 |
-
|
| 285 |
-
return _CANONICAL[idx.item()], conf.item()
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
# ββ Public API βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 289 |
-
def predict_sentiment(text: str):
|
| 290 |
-
|
| 291 |
-
fast = _fast_path(text)
|
| 292 |
-
if fast:
|
| 293 |
-
return fast
|
| 294 |
-
|
| 295 |
-
clean = _preprocess(text)
|
| 296 |
-
|
| 297 |
-
if not clean:
|
| 298 |
return "Neutral", 0.55
|
| 299 |
|
| 300 |
-
label, conf =
|
| 301 |
|
| 302 |
-
|
| 303 |
-
|
|
|
|
| 304 |
|
| 305 |
-
return label,
|
|
|
|
| 1 |
+
# -*- coding: utf-8 -*-
|
| 2 |
+
"""
|
| 3 |
+
ml/sentiment_model.py
|
| 4 |
+
=====================
|
| 5 |
+
Pure keyword/rule-based sentiment classifier for YouTube live-chat comments.
|
| 6 |
+
No ML models are loaded β classification is entirely keyword/regex-based.
|
| 7 |
+
|
| 8 |
+
Approach
|
| 9 |
+
--------
|
| 10 |
+
1. Emoji scoring β positive/negative emoji characters boost confidence
|
| 11 |
+
2. Negation check β "nahi accha" flips Positive β Negative
|
| 12 |
+
3. Intensifier boost β "bahut accha" raises confidence
|
| 13 |
+
4. Keyword matching β expanded Hinglish + English + regional + typo variants
|
| 14 |
+
5. Fallback β Neutral at 0.55 if nothing fires
|
| 15 |
+
|
| 16 |
+
Public API
|
| 17 |
+
----------
|
| 18 |
+
predict_sentiment(text: str) -> tuple[str, float]
|
| 19 |
+
Returns (label, confidence) where label β {"Positive", "Neutral", "Negative"}
|
| 20 |
+
and confidence β [0.50, 0.95].
|
| 21 |
+
"""
|
| 22 |
+
|
| 23 |
from __future__ import annotations
|
| 24 |
|
| 25 |
import re
|
|
|
|
|
|
|
| 26 |
|
| 27 |
import emoji
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
|
| 29 |
|
| 30 |
# ββ Emoji scoring ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 31 |
+
# Positive and negative emoji keyword sets (matched against demojized names)
|
| 32 |
+
_EMOJI_POS_KW = {
|
| 33 |
+
"love", "fire", "happy", "laugh", "win", "cool", "best", "heart",
|
| 34 |
+
"smile", "star", "clap", "pray", "sparkle", "sun", "rainbow",
|
| 35 |
+
"thumbs_up", "raised_hands", "partying", "grinning", "beaming",
|
| 36 |
+
"smiling", "joy", "hundred", "muscle", "trophy", "crown",
|
| 37 |
+
}
|
| 38 |
+
_EMOJI_NEG_KW = {
|
| 39 |
+
"angry", "sad", "cry", "worst", "bad", "hate", "skull", "vomit",
|
| 40 |
+
"rage", "broken", "disappointed", "thumbs_down", "weary", "tired",
|
| 41 |
+
"loudly_crying", "fearful", "anguished", "confounded", "persevere",
|
| 42 |
+
"unamused", "expressionless", "nauseated", "sneezing",
|
| 43 |
+
}
|
| 44 |
|
| 45 |
|
| 46 |
+
def _emoji_score(text: str) -> float:
|
| 47 |
+
"""Return a float in roughly [-0.4, 0.4] based on emoji sentiment."""
|
| 48 |
+
score = 0.0
|
| 49 |
for ch in text:
|
| 50 |
if emoji.is_emoji(ch):
|
| 51 |
name = emoji.demojize(ch)
|
| 52 |
+
if any(k in name for k in _EMOJI_POS_KW):
|
| 53 |
+
score += 0.15
|
| 54 |
+
elif any(k in name for k in _EMOJI_NEG_KW):
|
| 55 |
+
score -= 0.15
|
| 56 |
+
return max(-0.4, min(score, 0.4))
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
# ββ Negation words βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 60 |
+
# These flip the sentiment of a keyword that follows within a short window.
|
| 61 |
+
_NEGATION_WORDS: set[str] = {
|
| 62 |
+
# Hindi / Hinglish
|
| 63 |
+
"nahi", "nhi", "nahin", "na", "mat", "naa", "nope",
|
| 64 |
+
"bilkul nahi", "kabhi nahi", "kabhi nhi",
|
| 65 |
+
# English
|
| 66 |
+
"not", "no", "never", "neither", "nor", "without",
|
| 67 |
+
"don't", "dont", "doesn't", "doesnt", "didn't", "didnt",
|
| 68 |
+
"can't", "cant", "won't", "wont", "isn't", "isnt",
|
| 69 |
+
"wasn't", "wasnt", "aren't", "arent", "weren't", "werent",
|
| 70 |
+
"hardly", "barely", "scarcely",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
}
|
| 72 |
|
| 73 |
+
# Window size: how many words before a sentiment word to check for negation
|
| 74 |
+
_NEGATION_WINDOW = 3
|
| 75 |
|
|
|
|
|
|
|
| 76 |
|
| 77 |
+
def _is_negated(word_list: list[str], sentiment_idx: int) -> bool:
|
| 78 |
+
"""Return True if a negation word appears within _NEGATION_WINDOW words before OR after sentiment_idx.
|
|
|
|
|
|
|
| 79 |
|
| 80 |
+
Handles both:
|
| 81 |
+
- pre-negation: "nahi accha tha" (negation before sentiment word)
|
| 82 |
+
- post-negation: "boring nahi tha" (negation after sentiment word)
|
| 83 |
+
"""
|
| 84 |
+
# Look before
|
| 85 |
+
start = max(0, sentiment_idx - _NEGATION_WINDOW)
|
| 86 |
+
before = word_list[start:sentiment_idx]
|
| 87 |
+
if any(w in _NEGATION_WORDS for w in before):
|
| 88 |
+
return True
|
| 89 |
+
# Look after (smaller window β 2 words)
|
| 90 |
+
after = word_list[sentiment_idx + 1: sentiment_idx + 3]
|
| 91 |
+
return any(w in _NEGATION_WORDS for w in after)
|
| 92 |
+
|
| 93 |
+
|
| 94 |
+
# ββ Intensifier words ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 95 |
+
# These amplify the confidence when they appear near a sentiment word.
|
| 96 |
+
_INTENSIFIERS: dict[str, float] = {
|
| 97 |
+
# Hindi / Hinglish
|
| 98 |
+
"bahut": 0.10, # very
|
| 99 |
+
"bohot": 0.10,
|
| 100 |
+
"bht": 0.08,
|
| 101 |
+
"ekdum": 0.12, # absolutely
|
| 102 |
+
"bilkul": 0.10, # completely
|
| 103 |
+
"itna": 0.08, # this much
|
| 104 |
+
"kitna": 0.06,
|
| 105 |
+
"zyada": 0.08, # more/too much
|
| 106 |
+
"bohat": 0.10,
|
| 107 |
+
"atyant": 0.10, # extremely (formal Hindi)
|
| 108 |
+
"sampurn": 0.08, # completely
|
| 109 |
+
# English
|
| 110 |
+
"very": 0.08,
|
| 111 |
+
"too": 0.08,
|
| 112 |
+
"so": 0.06,
|
| 113 |
+
"super": 0.10,
|
| 114 |
+
"ultra": 0.10,
|
| 115 |
+
"extremely": 0.12,
|
| 116 |
+
"absolutely": 0.12,
|
| 117 |
+
"totally": 0.10,
|
| 118 |
+
"really": 0.08,
|
| 119 |
+
"truly": 0.08,
|
| 120 |
+
"highly": 0.08,
|
| 121 |
+
"deeply": 0.08,
|
| 122 |
+
"insanely": 0.10,
|
| 123 |
+
"incredibly": 0.10,
|
| 124 |
+
"genuinely": 0.08,
|
| 125 |
+
}
|
| 126 |
|
| 127 |
+
_INTENSIFIER_WINDOW = 2
|
| 128 |
+
|
| 129 |
+
|
| 130 |
+
def _intensifier_boost(word_list: list[str], sentiment_idx: int) -> float:
|
| 131 |
+
"""Return confidence boost from intensifiers within _INTENSIFIER_WINDOW words before sentiment_idx."""
|
| 132 |
+
start = max(0, sentiment_idx - _INTENSIFIER_WINDOW)
|
| 133 |
+
window = word_list[start:sentiment_idx]
|
| 134 |
+
boost = sum(_INTENSIFIERS.get(w, 0.0) for w in window)
|
| 135 |
+
return min(boost, 0.15) # cap single-word boost contribution
|
| 136 |
+
|
| 137 |
+
|
| 138 |
+
# ββ Positive keyword set βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 139 |
+
_POS_WORDS: set[str] = {
|
| 140 |
+
# ββ Core Hinglish slang ββ
|
| 141 |
+
"mast", "jhakaas", "kadak", "zabardast", "kamaal", "bindaas",
|
| 142 |
+
"shandar", "lajawaab", "lajawab", "lajaab", "waah", "wah",
|
| 143 |
+
"dhansu", "badhiya", "badiya", "maja", "mazza", "maza",
|
| 144 |
+
"acha", "accha", "achha", "acha", "sahi", "sach",
|
| 145 |
+
"shukriya", "dhanyawad", "dhanyavaad", "meherbani", "shukran",
|
| 146 |
+
"pyaar", "pyar", "khushi", "khush",
|
| 147 |
+
"fatafat", "jaldi",
|
| 148 |
+
|
| 149 |
+
# ββ Typo / abbreviation variants ββ
|
| 150 |
+
"osm", "awsm", "awsom", "awsome", "amzing", "amazng",
|
| 151 |
+
"gr8", "grt", "gr9", "fab", "fabbb",
|
| 152 |
+
"superrr", "amazinggg", "besttt", "niceee", "gooddd",
|
| 153 |
+
"thku", "thnku", "thnkuu", "thnkyou", "thanku", "thankyou",
|
| 154 |
+
"thnk", "thnq", "thnks", "thnx", "tysm", "tqsm", "thx", "ty",
|
| 155 |
+
"ty", "tyvm", "tyvmm",
|
| 156 |
+
|
| 157 |
+
# ββ English positive ββ
|
| 158 |
+
"amazing", "awesome", "excellent", "wonderful", "fantastic",
|
| 159 |
+
"brilliant", "outstanding", "exceptional", "magnificent",
|
| 160 |
+
"superb", "perfect", "great", "good", "nice", "beautiful",
|
| 161 |
+
"lovely", "loved", "love", "best", "better",
|
| 162 |
+
"helpful", "useful", "informative", "fruitful", "motivating",
|
| 163 |
+
"motivational", "inspiring", "inspired", "insightful",
|
| 164 |
+
"clear", "clarity", "simple", "easy", "smooth",
|
| 165 |
+
"thankful", "grateful", "blessed", "proud",
|
| 166 |
+
"happy", "glad", "pleased", "satisfied", "content",
|
| 167 |
+
"enjoy", "enjoyed", "enjoying", "fun", "interesting",
|
| 168 |
+
"impressive", "impressed", "incredible", "unbelievable",
|
| 169 |
+
"top", "topnotch", "firstclass", "worldclass",
|
| 170 |
+
"recommend", "recommended", "worth", "worthy",
|
| 171 |
+
"thanks", "thank", "appreciate", "appreciated",
|
| 172 |
+
"respect", "salute", "legend", "goat", "king", "queen",
|
| 173 |
+
"bestest", "bestttttt", "much", "op", "lit",
|
| 174 |
+
|
| 175 |
+
# ββ Regional / South Indian Hinglish ββ
|
| 176 |
+
"semma", # Tamil slang for awesome
|
| 177 |
+
"mass", # Tamil/Telugu slang for impressive
|
| 178 |
+
"vera level", # Tamil slang for next level
|
| 179 |
+
"sema", # variant of semma
|
| 180 |
+
"bindass", # variant of bindaas
|
| 181 |
+
"dum", # strength/power (positive context)
|
| 182 |
+
"dhamakedaar", # explosive/amazing
|
| 183 |
+
"dhamaka", # blast/amazing
|
| 184 |
+
"toofan", # storm (used positively)
|
| 185 |
+
"jalwa", # aura/presence (positive)
|
| 186 |
+
"josh", # enthusiasm/energy
|
| 187 |
+
"full josh",
|
| 188 |
+
"paisa vasool", # worth the money
|
| 189 |
+
"makkhan", # butter smooth (positive)
|
| 190 |
+
"solid", # solid/strong (positive)
|
| 191 |
+
"tight", # tight/solid (positive slang)
|
| 192 |
+
"fire", # fire (positive slang)
|
| 193 |
+
"goated", # GOAT-ed (positive slang)
|
| 194 |
+
"based", # based (positive slang)
|
| 195 |
+
"valid", # valid (positive slang)
|
| 196 |
+
"clean", # clean explanation
|
| 197 |
+
|
| 198 |
+
# ββ Gratitude phrases (single tokens after normalization) ββ
|
| 199 |
+
"shukriyaa", "shukriyaaa", "dhanyawaad", "dhanyawaaad",
|
| 200 |
+
"abhar", # gratitude (formal Hindi)
|
| 201 |
+
"aabhar",
|
| 202 |
+
|
| 203 |
+
# ββ Common live chat positives ββ
|
| 204 |
+
"woww", "wowww", "woah", "whoa", "yay", "yayy",
|
| 205 |
+
"haha", "hahaha", "lol", "lmao", # laughter = positive
|
| 206 |
+
"clap", "claps", "bravo", "chappal", # chappal = clap in some contexts
|
| 207 |
+
"heart", "hearts",
|
| 208 |
+
"100", "1000", # "100%" positive
|
| 209 |
+
}
|
| 210 |
|
| 211 |
+
# ββ Negative keyword set βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 212 |
+
_NEG_WORDS: set[str] = {
|
| 213 |
+
# ββ Core Hinglish slang ββ
|
| 214 |
+
"bakwas", "bakwaas", "bakwaaas",
|
| 215 |
+
"faltu", "faltuu",
|
| 216 |
+
"bekar", "bekaar", "bekaaar",
|
| 217 |
+
"ghatiya", "ghatiiya",
|
| 218 |
+
"wahiyat", "wahiyaat",
|
| 219 |
+
"bura", "buraa",
|
| 220 |
+
"kharab", "kharaaab",
|
| 221 |
+
"boring", "borring", "booring",
|
| 222 |
+
"ullu", "pagal", "paagal",
|
| 223 |
+
"besharam", "besharaam",
|
| 224 |
+
"nafrat", "gussa", "naraaz",
|
| 225 |
+
"dukh", "takleef", "mushkil",
|
| 226 |
+
"uruttu", "battamizi", "battameezi",
|
| 227 |
+
"natak", "nautanki",
|
| 228 |
+
"dhoka", "dhokha", "jhooth", "jhoota",
|
| 229 |
+
"dikhawa", "dikhaawa",
|
| 230 |
+
"beizzati", "beizzatii", "bezaati",
|
| 231 |
+
"sharam", "sharaam",
|
| 232 |
+
"galat", "galt",
|
| 233 |
+
"jhanjhat", "jhamela",
|
| 234 |
+
"tang", "pareshan", "pareshaan",
|
| 235 |
+
"nirasha", "niraash", # disappointment
|
| 236 |
+
"thaka", "thakaan", # tired/exhausted
|
| 237 |
+
"dard", "peeda", # pain
|
| 238 |
+
"rona", "rota", "roti", # crying
|
| 239 |
+
"cheat", "cheating",
|
| 240 |
+
"fraud", "fraudiya",
|
| 241 |
+
"loot", "loota", "looting",
|
| 242 |
+
|
| 243 |
+
# ββ English negative ββ
|
| 244 |
+
"useless", "unfair", "disappointing", "disappointed",
|
| 245 |
+
"foolish", "stupid", "idiot", "idiotic",
|
| 246 |
+
"terrible", "horrible", "awful", "dreadful",
|
| 247 |
+
"worst", "worse", "bad", "poor",
|
| 248 |
+
"waste", "wasted", "pathetic",
|
| 249 |
+
"annoying", "annoyed", "irritating", "irritated",
|
| 250 |
+
"frustrating", "frustrated", "frustration",
|
| 251 |
+
"confusing", "confused", "confusion",
|
| 252 |
+
"misleading", "clickbait",
|
| 253 |
+
"fake", "scam", "spam",
|
| 254 |
+
"hate", "hated", "hating",
|
| 255 |
+
"angry", "anger", "rage",
|
| 256 |
+
"sad", "sadness", "unhappy", "upset",
|
| 257 |
+
"wrong", "incorrect", "error", "mistake",
|
| 258 |
+
"problem", "issue", "bug", "broken",
|
| 259 |
+
"slow", "lagging", "lag", "buffering",
|
| 260 |
+
"crash", "crashed", "crashing",
|
| 261 |
+
"fail", "failed", "failure",
|
| 262 |
+
"ignore", "ignored", "ignoring",
|
| 263 |
+
"rude", "disrespect", "disrespectful",
|
| 264 |
+
"unfair", "biased", "bias",
|
| 265 |
+
"overpriced", "expensive", "costly",
|
| 266 |
+
"wtf", "wth", "omg", # context-dependent but often negative in complaints
|
| 267 |
+
"curse", "abusive",
|
| 268 |
+
"liar", "lie", "lies",
|
| 269 |
+
"cheat", "cheater",
|
| 270 |
+
"regret", "regretted", "regrets",
|
| 271 |
+
"never", "worst",
|
| 272 |
+
|
| 273 |
+
# ββ Typo / abbreviation variants ββ
|
| 274 |
+
"bakwaaas", "bekarrr", "borinnng",
|
| 275 |
+
"worstttt", "terribleee",
|
| 276 |
+
|
| 277 |
+
# ββ Regional / South Indian Hinglish ββ
|
| 278 |
+
"kabaad", # junk/trash
|
| 279 |
+
"raddi", # waste/junk
|
| 280 |
+
"kachra", # garbage
|
| 281 |
+
"bekar", # useless (already above)
|
| 282 |
+
"nikamma", # good-for-nothing
|
| 283 |
+
"nalayak", # incompetent
|
| 284 |
+
"kamina", # scoundrel
|
| 285 |
+
"harami", # offensive negative
|
| 286 |
+
"bewakoof", # fool
|
| 287 |
+
"gadha", # donkey (fool)
|
| 288 |
+
"buddhu", # fool
|
| 289 |
+
"duffer", # dull/stupid
|
| 290 |
+
"flop", # flop/failure
|
| 291 |
+
"disaster", # disaster
|
| 292 |
+
"pathetic", # pathetic (already above)
|
| 293 |
+
"cringe", # cringe
|
| 294 |
+
"cap", # cap = lie (slang)
|
| 295 |
+
"mid", # mid = mediocre/bad (slang)
|
| 296 |
+
"trash", # trash
|
| 297 |
+
"garbage", # garbage
|
| 298 |
+
"dogwater", # very bad (gaming slang)
|
| 299 |
+
"lowkey bad",
|
| 300 |
+
"not good",
|
| 301 |
+
"not helpful",
|
| 302 |
+
"not worth",
|
| 303 |
+
"time waste",
|
| 304 |
+
"time wasted",
|
| 305 |
+
"waste of time",
|
| 306 |
+
}
|
| 307 |
|
|
|
|
| 308 |
|
| 309 |
+
# ββ Text normalisation βββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 310 |
|
| 311 |
+
def _normalise(text: str) -> str:
|
| 312 |
+
"""Lowercase, strip emoji codes, collapse repeated chars, collapse whitespace."""
|
| 313 |
+
# Strip demojized emoji codes like :fire: :thumbs_up:
|
| 314 |
+
t = re.sub(r":[a-z_]+:", " ", text)
|
| 315 |
+
t = t.lower()
|
| 316 |
+
# Collapse 3+ repeated chars to 2: "amazinggg" β "amazingg", "niceee" β "nicee"
|
| 317 |
+
# (keeps double so "woww" still matches "woww" in keyword set)
|
| 318 |
+
t = re.sub(r"(.)\1{2,}", r"\1\1", t)
|
| 319 |
+
t = re.sub(r"\s+", " ", t).strip()
|
| 320 |
+
return t[:512]
|
| 321 |
|
| 322 |
|
| 323 |
+
# ββ Core classification ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
| 324 |
|
| 325 |
+
def _classify(text: str) -> tuple[str, float]:
|
| 326 |
+
"""
|
| 327 |
+
Classify normalised text using keyword matching with negation and intensifier handling.
|
| 328 |
|
| 329 |
+
Returns (label, base_confidence) before emoji adjustment.
|
| 330 |
+
"""
|
| 331 |
+
t = _normalise(text)
|
| 332 |
|
| 333 |
+
if len(t) <= 2:
|
| 334 |
+
return "Neutral", 0.55
|
| 335 |
|
| 336 |
+
word_list = t.split()
|
| 337 |
+
word_set = set(word_list)
|
| 338 |
+
|
| 339 |
+
pos_score = 0.0
|
| 340 |
+
neg_score = 0.0
|
| 341 |
+
pos_boost = 0.0
|
| 342 |
+
neg_boost = 0.0
|
| 343 |
+
|
| 344 |
+
for idx, word in enumerate(word_list):
|
| 345 |
+
negated = _is_negated(word_list, idx)
|
| 346 |
+
int_boost = _intensifier_boost(word_list, idx)
|
| 347 |
+
|
| 348 |
+
if word in _POS_WORDS:
|
| 349 |
+
if negated:
|
| 350 |
+
neg_score += 1.0
|
| 351 |
+
neg_boost = max(neg_boost, int_boost)
|
| 352 |
+
else:
|
| 353 |
+
pos_score += 1.0
|
| 354 |
+
pos_boost = max(pos_boost, int_boost)
|
| 355 |
+
|
| 356 |
+
elif word in _NEG_WORDS:
|
| 357 |
+
if negated:
|
| 358 |
+
pos_score += 1.0
|
| 359 |
+
pos_boost = max(pos_boost, int_boost)
|
| 360 |
+
else:
|
| 361 |
+
neg_score += 1.0
|
| 362 |
+
neg_boost = max(neg_boost, int_boost)
|
| 363 |
+
|
| 364 |
+
# No keyword hits β Neutral
|
| 365 |
+
if pos_score == 0 and neg_score == 0:
|
| 366 |
+
return "Neutral", 0.55
|
| 367 |
|
| 368 |
+
# Determine winner
|
| 369 |
+
if pos_score > neg_score:
|
| 370 |
+
base_conf = min(0.72 + 0.05 * pos_score + pos_boost, 0.92)
|
| 371 |
+
return "Positive", round(base_conf, 3)
|
| 372 |
|
| 373 |
+
if neg_score > pos_score:
|
| 374 |
+
base_conf = min(0.72 + 0.05 * neg_score + neg_boost, 0.92)
|
| 375 |
+
return "Negative", round(base_conf, 3)
|
| 376 |
|
| 377 |
+
# Tie β Neutral with moderate confidence
|
| 378 |
+
return "Neutral", 0.58
|
|
|
|
| 379 |
|
|
|
|
| 380 |
|
| 381 |
+
# ββ Public API βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 382 |
|
| 383 |
+
def predict_sentiment(text: str) -> tuple[str, float]:
|
| 384 |
"""
|
| 385 |
+
Classify a comment's sentiment.
|
| 386 |
+
|
| 387 |
+
Parameters
|
| 388 |
+
----------
|
| 389 |
+
text : str
|
| 390 |
+
Raw comment text (may be Hinglish, emoji-containing, mixed script, or None).
|
| 391 |
+
|
| 392 |
+
Returns
|
| 393 |
+
-------
|
| 394 |
+
label : str
|
| 395 |
+
One of "Positive", "Neutral", "Negative".
|
| 396 |
+
confidence : float
|
| 397 |
+
Rule-based confidence in [0.50, 0.95].
|
| 398 |
+
|
| 399 |
+
Notes
|
| 400 |
+
-----
|
| 401 |
+
- Deterministic: same input always produces the same output.
|
| 402 |
+
- No ML models, no I/O, no side effects.
|
| 403 |
+
- None and empty/whitespace-only strings return ("Neutral", 0.55).
|
| 404 |
"""
|
| 405 |
+
if not text or not text.strip():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 406 |
return "Neutral", 0.55
|
| 407 |
|
| 408 |
+
label, conf = _classify(text)
|
| 409 |
|
| 410 |
+
# Adjust confidence by emoji sentiment in the original text
|
| 411 |
+
emoji_adj = _emoji_score(text)
|
| 412 |
+
conf = round(max(0.50, min(conf + emoji_adj, 0.95)), 3)
|
| 413 |
|
| 414 |
+
return label, conf
|
requirements.txt
CHANGED
|
@@ -1,13 +1,8 @@
|
|
| 1 |
-
# Core ML
|
| 2 |
-
torch>=2.0.0
|
| 3 |
-
transformers>=4.38.0
|
| 4 |
-
sentencepiece>=0.1.99
|
| 5 |
-
|
| 6 |
# Emoji + slang handling
|
| 7 |
emoji>=2.10.0
|
| 8 |
deep-translator>=1.11.4
|
| 9 |
|
| 10 |
-
# Live chat scraping (
|
| 11 |
|
| 12 |
# Dashboard
|
| 13 |
streamlit>=1.35.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
# Emoji + slang handling
|
| 2 |
emoji>=2.10.0
|
| 3 |
deep-translator>=1.11.4
|
| 4 |
|
| 5 |
+
# Live chat scraping (uses YouTube Data API v3 β no extra package needed)
|
| 6 |
|
| 7 |
# Dashboard
|
| 8 |
streamlit>=1.35.0
|