gtech13 commited on
Commit
b53e4bf
Β·
verified Β·
1 Parent(s): 2d77f14

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -14
app.py CHANGED
@@ -242,7 +242,7 @@ if st.button("πŸš€ Run Topic Modeling Pipeline", type="primary", use_container_w
242
  if len(eval_metrics) > 0:
243
  with st.spinner("Calculating mathematical metrics... (NPMI requires building a dictionary and takes a moment)"):
244
 
245
- # Diversity
246
  if "Topic Diversity" in eval_metrics and len(all_words) > 0:
247
  u_words = set([w for t in all_words for w in t])
248
  t_words = sum([len(t) for t in all_words])
@@ -250,7 +250,7 @@ if st.button("πŸš€ Run Topic Modeling Pipeline", type="primary", use_container_w
250
  div_val = float(len(u_words) / t_words) if t_words > 0 else 0.0
251
  div_status = f"{div_val:.2f}"
252
 
253
- # Coherence Models
254
  if "NPMI Coherence" in eval_metrics or "UMass Coherence" in eval_metrics:
255
  try:
256
  tokenized = [vectorizer_model.build_analyzer()(s) for s in sentences]
@@ -260,7 +260,7 @@ if st.button("πŸš€ Run Topic Modeling Pipeline", type="primary", use_container_w
260
  cm_npmi = CoherenceModel(topics=all_words, texts=tokenized, dictionary=dictionary, coherence='c_npmi')
261
  temp_npmi = cm_npmi.get_coherence()
262
  if np.isnan(temp_npmi):
263
- npmi_status = "N/A"
264
  else:
265
  npmi_val = float(temp_npmi)
266
  npmi_status = f"{npmi_val:.2f}"
@@ -269,15 +269,15 @@ if st.button("πŸš€ Run Topic Modeling Pipeline", type="primary", use_container_w
269
  cm_umass = CoherenceModel(topics=all_words, texts=tokenized, dictionary=dictionary, coherence='u_mass')
270
  temp_umass = cm_umass.get_coherence()
271
  if np.isnan(temp_umass):
272
- umass_status = "N/A"
273
  else:
274
  umass_val = float(temp_umass)
275
  umass_status = f"{umass_val:.2f}"
276
  except Exception:
277
- npmi_status = "Skipped"
278
- umass_status = "Skipped"
279
 
280
- # Silhouette
281
  if "Silhouette Score" in eval_metrics:
282
  valid_idx = [i for i, t in enumerate(topics) if t != -1]
283
  unique_topics = set([topics[i] for i in valid_idx])
@@ -289,7 +289,7 @@ if st.button("πŸš€ Run Topic Modeling Pipeline", type="primary", use_container_w
289
  ))
290
  sil_status = f"{sil_val:.2f}"
291
  else:
292
- sil_status = "Skipped"
293
 
294
  # --- RENDER KPI DASHBOARD WITH TOOLTIPS ---
295
  st.markdown("### πŸ“Š Key Performance Indicators (KPI)")
@@ -298,15 +298,29 @@ if st.button("πŸš€ Run Topic Modeling Pipeline", type="primary", use_container_w
298
  for idx, metric in enumerate(eval_metrics):
299
  with kpi_cols[idx]:
300
  if metric == "Topic Diversity":
301
- st.metric("Topic Diversity", div_status, help="Math: Unique Words / Total Words.\nTarget: 1.0 (No redundant words across themes).")
 
 
 
 
302
  elif metric == "NPMI Coherence":
303
- st.metric("NPMI Coherence", npmi_status, help="Math: Normalized Pointwise Mutual Information.\nCalculates joint probability of words existing together.\nTarget: >0.1")
 
 
 
 
304
  elif metric == "UMass Coherence":
305
- st.metric("UMass Coherence", umass_status, help="Math: Internal log-conditional probability.\nEvaluates if words co-occur strictly inside your uploaded dataset.\nTarget: Closer to 0.")
 
 
 
 
306
  elif metric == "Silhouette Score":
307
- st.metric("Silhouette Score", sil_status, help="Math: (b - a) / max(a,b).\nMeasures intra-cluster density (a) vs nearest-cluster distance (b).\nTarget: >0.0")
308
-
309
-
 
 
310
  # ==========================================
311
  # 5. XAI VISUALIZATION GRAPH (With Live Math & Matrices)
312
  # ==========================================
 
242
  if len(eval_metrics) > 0:
243
  with st.spinner("Calculating mathematical metrics... (NPMI requires building a dictionary and takes a moment)"):
244
 
245
+ # 1. Diversity
246
  if "Topic Diversity" in eval_metrics and len(all_words) > 0:
247
  u_words = set([w for t in all_words for w in t])
248
  t_words = sum([len(t) for t in all_words])
 
250
  div_val = float(len(u_words) / t_words) if t_words > 0 else 0.0
251
  div_status = f"{div_val:.2f}"
252
 
253
+ # 2. Coherence Models (NPMI & UMass)
254
  if "NPMI Coherence" in eval_metrics or "UMass Coherence" in eval_metrics:
255
  try:
256
  tokenized = [vectorizer_model.build_analyzer()(s) for s in sentences]
 
260
  cm_npmi = CoherenceModel(topics=all_words, texts=tokenized, dictionary=dictionary, coherence='c_npmi')
261
  temp_npmi = cm_npmi.get_coherence()
262
  if np.isnan(temp_npmi):
263
+ npmi_status = "N/A (Too few words)"
264
  else:
265
  npmi_val = float(temp_npmi)
266
  npmi_status = f"{npmi_val:.2f}"
 
269
  cm_umass = CoherenceModel(topics=all_words, texts=tokenized, dictionary=dictionary, coherence='u_mass')
270
  temp_umass = cm_umass.get_coherence()
271
  if np.isnan(temp_umass):
272
+ umass_status = "N/A (Too few words)"
273
  else:
274
  umass_val = float(temp_umass)
275
  umass_status = f"{umass_val:.2f}"
276
  except Exception:
277
+ npmi_status = "Skipped (Data too small)"
278
+ umass_status = "Skipped (Data too small)"
279
 
280
+ # 3. Silhouette Score
281
  if "Silhouette Score" in eval_metrics:
282
  valid_idx = [i for i, t in enumerate(topics) if t != -1]
283
  unique_topics = set([topics[i] for i in valid_idx])
 
289
  ))
290
  sil_status = f"{sil_val:.2f}"
291
  else:
292
+ sil_status = "Skipped (Themes need β‰₯2 sentences)"
293
 
294
  # --- RENDER KPI DASHBOARD WITH TOOLTIPS ---
295
  st.markdown("### πŸ“Š Key Performance Indicators (KPI)")
 
298
  for idx, metric in enumerate(eval_metrics):
299
  with kpi_cols[idx]:
300
  if metric == "Topic Diversity":
301
+ st.metric(
302
+ label="Topic Diversity",
303
+ value=div_status,
304
+ help="Math: Unique Words / Total Words.\nTarget: 1.0 (No redundant words across themes)."
305
+ )
306
  elif metric == "NPMI Coherence":
307
+ st.metric(
308
+ label="NPMI Coherence",
309
+ value=npmi_status,
310
+ help="Math: Normalized Pointwise Mutual Information.\nCalculates joint probability of words existing together.\nTarget: >0.1"
311
+ )
312
  elif metric == "UMass Coherence":
313
+ st.metric(
314
+ label="UMass Coherence",
315
+ value=umass_status,
316
+ help="Math: Internal log-conditional probability.\nEvaluates if words co-occur strictly inside your uploaded dataset.\nTarget: Closer to 0."
317
+ )
318
  elif metric == "Silhouette Score":
319
+ st.metric(
320
+ label="Silhouette Score",
321
+ value=sil_status,
322
+ help="Math: (b - a) / max(a,b).\nMeasures intra-cluster density (a) vs nearest-cluster distance (b).\nTarget: >0.0"
323
+ )
324
  # ==========================================
325
  # 5. XAI VISUALIZATION GRAPH (With Live Math & Matrices)
326
  # ==========================================