Spaces:

gtech13
/

Topic-Modeling-BERTopic-Math-Visualization

Sleeping

App Files Files Community

gtech13 commited on 28 days ago

Commit

b53e4bf

verified ·

1 Parent(s): 2d77f14

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -14

app.py CHANGED Viewed

@@ -242,7 +242,7 @@ if st.button("🚀 Run Topic Modeling Pipeline", type="primary", use_container_w
     if len(eval_metrics) > 0:
         with st.spinner("Calculating mathematical metrics... (NPMI requires building a dictionary and takes a moment)"):
-            # Diversity
             if "Topic Diversity" in eval_metrics and len(all_words) > 0:
                 u_words = set([w for t in all_words for w in t])
                 t_words = sum([len(t) for t in all_words])
@@ -250,7 +250,7 @@ if st.button("🚀 Run Topic Modeling Pipeline", type="primary", use_container_w
                 div_val = float(len(u_words) / t_words) if t_words > 0 else 0.0
                 div_status = f"{div_val:.2f}"
-            # Coherence Models
             if "NPMI Coherence" in eval_metrics or "UMass Coherence" in eval_metrics:
                 try:
                     tokenized = [vectorizer_model.build_analyzer()(s) for s in sentences]
@@ -260,7 +260,7 @@ if st.button("🚀 Run Topic Modeling Pipeline", type="primary", use_container_w
                         cm_npmi = CoherenceModel(topics=all_words, texts=tokenized, dictionary=dictionary, coherence='c_npmi')
                         temp_npmi = cm_npmi.get_coherence()
                         if np.isnan(temp_npmi):
-                            npmi_status = "N/A"
                         else:
                             npmi_val = float(temp_npmi)
                             npmi_status = f"{npmi_val:.2f}"
@@ -269,15 +269,15 @@ if st.button("🚀 Run Topic Modeling Pipeline", type="primary", use_container_w
                         cm_umass = CoherenceModel(topics=all_words, texts=tokenized, dictionary=dictionary, coherence='u_mass')
                         temp_umass = cm_umass.get_coherence()
                         if np.isnan(temp_umass):
-                            umass_status = "N/A"
                         else:
                             umass_val = float(temp_umass)
                             umass_status = f"{umass_val:.2f}"
                 except Exception:
-                    npmi_status = "Skipped"
-                    umass_status = "Skipped"
-            # Silhouette
             if "Silhouette Score" in eval_metrics:
                 valid_idx = [i for i, t in enumerate(topics) if t != -1]
                 unique_topics = set([topics[i] for i in valid_idx])
@@ -289,7 +289,7 @@ if st.button("🚀 Run Topic Modeling Pipeline", type="primary", use_container_w
                     ))
                     sil_status = f"{sil_val:.2f}"
                 else:
-                    sil_status = "Skipped"
         # --- RENDER KPI DASHBOARD WITH TOOLTIPS ---
         st.markdown("### 📊 Key Performance Indicators (KPI)")
@@ -298,15 +298,29 @@ if st.button("🚀 Run Topic Modeling Pipeline", type="primary", use_container_w
         for idx, metric in enumerate(eval_metrics):
             with kpi_cols[idx]:
                 if metric == "Topic Diversity":
-                    st.metric("Topic Diversity", div_status, help="Math: Unique Words / Total Words.\nTarget: 1.0 (No redundant words across themes).")
                 elif metric == "NPMI Coherence":
-                    st.metric("NPMI Coherence", npmi_status, help="Math: Normalized Pointwise Mutual Information.\nCalculates joint probability of words existing together.\nTarget: >0.1")
                 elif metric == "UMass Coherence":
-                    st.metric("UMass Coherence", umass_status, help="Math: Internal log-conditional probability.\nEvaluates if words co-occur strictly inside your uploaded dataset.\nTarget: Closer to 0.")
                 elif metric == "Silhouette Score":
-                    st.metric("Silhouette Score", sil_status, help="Math: (b - a) / max(a,b).\nMeasures intra-cluster density (a) vs nearest-cluster distance (b).\nTarget: >0.0")
 # ==========================================
     # 5. XAI VISUALIZATION GRAPH (With Live Math & Matrices)
     # ==========================================

     if len(eval_metrics) > 0:
         with st.spinner("Calculating mathematical metrics... (NPMI requires building a dictionary and takes a moment)"):
+            # 1. Diversity
             if "Topic Diversity" in eval_metrics and len(all_words) > 0:
                 u_words = set([w for t in all_words for w in t])
                 t_words = sum([len(t) for t in all_words])
                 div_val = float(len(u_words) / t_words) if t_words > 0 else 0.0
                 div_status = f"{div_val:.2f}"
+            # 2. Coherence Models (NPMI & UMass)
             if "NPMI Coherence" in eval_metrics or "UMass Coherence" in eval_metrics:
                 try:
                     tokenized = [vectorizer_model.build_analyzer()(s) for s in sentences]
                         cm_npmi = CoherenceModel(topics=all_words, texts=tokenized, dictionary=dictionary, coherence='c_npmi')
                         temp_npmi = cm_npmi.get_coherence()
                         if np.isnan(temp_npmi):
+                            npmi_status = "N/A (Too few words)"
                         else:
                             npmi_val = float(temp_npmi)
                             npmi_status = f"{npmi_val:.2f}"
                         cm_umass = CoherenceModel(topics=all_words, texts=tokenized, dictionary=dictionary, coherence='u_mass')
                         temp_umass = cm_umass.get_coherence()
                         if np.isnan(temp_umass):
+                            umass_status = "N/A (Too few words)"
                         else:
                             umass_val = float(temp_umass)
                             umass_status = f"{umass_val:.2f}"
                 except Exception:
+                    npmi_status = "Skipped (Data too small)"
+                    umass_status = "Skipped (Data too small)"
+            # 3. Silhouette Score
             if "Silhouette Score" in eval_metrics:
                 valid_idx = [i for i, t in enumerate(topics) if t != -1]
                 unique_topics = set([topics[i] for i in valid_idx])
                     ))
                     sil_status = f"{sil_val:.2f}"
                 else:
+                    sil_status = "Skipped (Themes need ≥2 sentences)"
         # --- RENDER KPI DASHBOARD WITH TOOLTIPS ---
         st.markdown("### 📊 Key Performance Indicators (KPI)")
         for idx, metric in enumerate(eval_metrics):
             with kpi_cols[idx]:
                 if metric == "Topic Diversity":
+                    st.metric(
+                        label="Topic Diversity",
+                        value=div_status,
+                        help="Math: Unique Words / Total Words.\nTarget: 1.0 (No redundant words across themes)."
+                    )
                 elif metric == "NPMI Coherence":
+                    st.metric(
+                        label="NPMI Coherence",
+                        value=npmi_status,
+                        help="Math: Normalized Pointwise Mutual Information.\nCalculates joint probability of words existing together.\nTarget: >0.1"
+                    )
                 elif metric == "UMass Coherence":
+                    st.metric(
+                        label="UMass Coherence",
+                        value=umass_status,
+                        help="Math: Internal log-conditional probability.\nEvaluates if words co-occur strictly inside your uploaded dataset.\nTarget: Closer to 0."
+                    )
                 elif metric == "Silhouette Score":
+                    st.metric(
+                        label="Silhouette Score",
+                        value=sil_status,
+                        help="Math: (b - a) / max(a,b).\nMeasures intra-cluster density (a) vs nearest-cluster distance (b).\nTarget: >0.0"
+                    )
 # ==========================================
     # 5. XAI VISUALIZATION GRAPH (With Live Math & Matrices)
     # ==========================================