Update app.py
Browse files
app.py
CHANGED
|
@@ -242,7 +242,7 @@ if st.button("π Run Topic Modeling Pipeline", type="primary", use_container_w
|
|
| 242 |
if len(eval_metrics) > 0:
|
| 243 |
with st.spinner("Calculating mathematical metrics... (NPMI requires building a dictionary and takes a moment)"):
|
| 244 |
|
| 245 |
-
# Diversity
|
| 246 |
if "Topic Diversity" in eval_metrics and len(all_words) > 0:
|
| 247 |
u_words = set([w for t in all_words for w in t])
|
| 248 |
t_words = sum([len(t) for t in all_words])
|
|
@@ -250,7 +250,7 @@ if st.button("π Run Topic Modeling Pipeline", type="primary", use_container_w
|
|
| 250 |
div_val = float(len(u_words) / t_words) if t_words > 0 else 0.0
|
| 251 |
div_status = f"{div_val:.2f}"
|
| 252 |
|
| 253 |
-
# Coherence Models
|
| 254 |
if "NPMI Coherence" in eval_metrics or "UMass Coherence" in eval_metrics:
|
| 255 |
try:
|
| 256 |
tokenized = [vectorizer_model.build_analyzer()(s) for s in sentences]
|
|
@@ -260,7 +260,7 @@ if st.button("π Run Topic Modeling Pipeline", type="primary", use_container_w
|
|
| 260 |
cm_npmi = CoherenceModel(topics=all_words, texts=tokenized, dictionary=dictionary, coherence='c_npmi')
|
| 261 |
temp_npmi = cm_npmi.get_coherence()
|
| 262 |
if np.isnan(temp_npmi):
|
| 263 |
-
npmi_status = "N/A"
|
| 264 |
else:
|
| 265 |
npmi_val = float(temp_npmi)
|
| 266 |
npmi_status = f"{npmi_val:.2f}"
|
|
@@ -269,15 +269,15 @@ if st.button("π Run Topic Modeling Pipeline", type="primary", use_container_w
|
|
| 269 |
cm_umass = CoherenceModel(topics=all_words, texts=tokenized, dictionary=dictionary, coherence='u_mass')
|
| 270 |
temp_umass = cm_umass.get_coherence()
|
| 271 |
if np.isnan(temp_umass):
|
| 272 |
-
umass_status = "N/A"
|
| 273 |
else:
|
| 274 |
umass_val = float(temp_umass)
|
| 275 |
umass_status = f"{umass_val:.2f}"
|
| 276 |
except Exception:
|
| 277 |
-
npmi_status = "Skipped"
|
| 278 |
-
umass_status = "Skipped"
|
| 279 |
|
| 280 |
-
# Silhouette
|
| 281 |
if "Silhouette Score" in eval_metrics:
|
| 282 |
valid_idx = [i for i, t in enumerate(topics) if t != -1]
|
| 283 |
unique_topics = set([topics[i] for i in valid_idx])
|
|
@@ -289,7 +289,7 @@ if st.button("π Run Topic Modeling Pipeline", type="primary", use_container_w
|
|
| 289 |
))
|
| 290 |
sil_status = f"{sil_val:.2f}"
|
| 291 |
else:
|
| 292 |
-
sil_status = "Skipped"
|
| 293 |
|
| 294 |
# --- RENDER KPI DASHBOARD WITH TOOLTIPS ---
|
| 295 |
st.markdown("### π Key Performance Indicators (KPI)")
|
|
@@ -298,15 +298,29 @@ if st.button("π Run Topic Modeling Pipeline", type="primary", use_container_w
|
|
| 298 |
for idx, metric in enumerate(eval_metrics):
|
| 299 |
with kpi_cols[idx]:
|
| 300 |
if metric == "Topic Diversity":
|
| 301 |
-
st.metric(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 302 |
elif metric == "NPMI Coherence":
|
| 303 |
-
st.metric(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 304 |
elif metric == "UMass Coherence":
|
| 305 |
-
st.metric(
|
|
|
|
|
|
|
|
|
|
|
|
|
| 306 |
elif metric == "Silhouette Score":
|
| 307 |
-
st.metric(
|
| 308 |
-
|
| 309 |
-
|
|
|
|
|
|
|
| 310 |
# ==========================================
|
| 311 |
# 5. XAI VISUALIZATION GRAPH (With Live Math & Matrices)
|
| 312 |
# ==========================================
|
|
|
|
| 242 |
if len(eval_metrics) > 0:
|
| 243 |
with st.spinner("Calculating mathematical metrics... (NPMI requires building a dictionary and takes a moment)"):
|
| 244 |
|
| 245 |
+
# 1. Diversity
|
| 246 |
if "Topic Diversity" in eval_metrics and len(all_words) > 0:
|
| 247 |
u_words = set([w for t in all_words for w in t])
|
| 248 |
t_words = sum([len(t) for t in all_words])
|
|
|
|
| 250 |
div_val = float(len(u_words) / t_words) if t_words > 0 else 0.0
|
| 251 |
div_status = f"{div_val:.2f}"
|
| 252 |
|
| 253 |
+
# 2. Coherence Models (NPMI & UMass)
|
| 254 |
if "NPMI Coherence" in eval_metrics or "UMass Coherence" in eval_metrics:
|
| 255 |
try:
|
| 256 |
tokenized = [vectorizer_model.build_analyzer()(s) for s in sentences]
|
|
|
|
| 260 |
cm_npmi = CoherenceModel(topics=all_words, texts=tokenized, dictionary=dictionary, coherence='c_npmi')
|
| 261 |
temp_npmi = cm_npmi.get_coherence()
|
| 262 |
if np.isnan(temp_npmi):
|
| 263 |
+
npmi_status = "N/A (Too few words)"
|
| 264 |
else:
|
| 265 |
npmi_val = float(temp_npmi)
|
| 266 |
npmi_status = f"{npmi_val:.2f}"
|
|
|
|
| 269 |
cm_umass = CoherenceModel(topics=all_words, texts=tokenized, dictionary=dictionary, coherence='u_mass')
|
| 270 |
temp_umass = cm_umass.get_coherence()
|
| 271 |
if np.isnan(temp_umass):
|
| 272 |
+
umass_status = "N/A (Too few words)"
|
| 273 |
else:
|
| 274 |
umass_val = float(temp_umass)
|
| 275 |
umass_status = f"{umass_val:.2f}"
|
| 276 |
except Exception:
|
| 277 |
+
npmi_status = "Skipped (Data too small)"
|
| 278 |
+
umass_status = "Skipped (Data too small)"
|
| 279 |
|
| 280 |
+
# 3. Silhouette Score
|
| 281 |
if "Silhouette Score" in eval_metrics:
|
| 282 |
valid_idx = [i for i, t in enumerate(topics) if t != -1]
|
| 283 |
unique_topics = set([topics[i] for i in valid_idx])
|
|
|
|
| 289 |
))
|
| 290 |
sil_status = f"{sil_val:.2f}"
|
| 291 |
else:
|
| 292 |
+
sil_status = "Skipped (Themes need β₯2 sentences)"
|
| 293 |
|
| 294 |
# --- RENDER KPI DASHBOARD WITH TOOLTIPS ---
|
| 295 |
st.markdown("### π Key Performance Indicators (KPI)")
|
|
|
|
| 298 |
for idx, metric in enumerate(eval_metrics):
|
| 299 |
with kpi_cols[idx]:
|
| 300 |
if metric == "Topic Diversity":
|
| 301 |
+
st.metric(
|
| 302 |
+
label="Topic Diversity",
|
| 303 |
+
value=div_status,
|
| 304 |
+
help="Math: Unique Words / Total Words.\nTarget: 1.0 (No redundant words across themes)."
|
| 305 |
+
)
|
| 306 |
elif metric == "NPMI Coherence":
|
| 307 |
+
st.metric(
|
| 308 |
+
label="NPMI Coherence",
|
| 309 |
+
value=npmi_status,
|
| 310 |
+
help="Math: Normalized Pointwise Mutual Information.\nCalculates joint probability of words existing together.\nTarget: >0.1"
|
| 311 |
+
)
|
| 312 |
elif metric == "UMass Coherence":
|
| 313 |
+
st.metric(
|
| 314 |
+
label="UMass Coherence",
|
| 315 |
+
value=umass_status,
|
| 316 |
+
help="Math: Internal log-conditional probability.\nEvaluates if words co-occur strictly inside your uploaded dataset.\nTarget: Closer to 0."
|
| 317 |
+
)
|
| 318 |
elif metric == "Silhouette Score":
|
| 319 |
+
st.metric(
|
| 320 |
+
label="Silhouette Score",
|
| 321 |
+
value=sil_status,
|
| 322 |
+
help="Math: (b - a) / max(a,b).\nMeasures intra-cluster density (a) vs nearest-cluster distance (b).\nTarget: >0.0"
|
| 323 |
+
)
|
| 324 |
# ==========================================
|
| 325 |
# 5. XAI VISUALIZATION GRAPH (With Live Math & Matrices)
|
| 326 |
# ==========================================
|