inspect_web_clusters

Running

loubnabnl HF staff commited on Jan 23

Commit

85a8c20

•

1 Parent(s): f2ed9e8

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -15,20 +15,32 @@ Our approach involved prompting Mixtral to evaluate whether the topics in each c
 Additionally, the model was tasked with finding the topic of each cluster.
 """)
 @st.cache_data
-def load_data(min_score=1, max_score=10):
     ds = load_dataset("HuggingFaceTB/FW_clusters_free_topics", split="train", token=HF_TOKEN, num_proc=2)
-    ds = ds.filter(lambda x: int(x['educational_score']) <= max_score and int(x['educational_score']) >= min_score)
     return ds
 st.subheader("Cluster information")
-col_1, col_2 = st.columns(2)
 with col_1:
     min_value = st.slider('Select minimum educational score', 1, 10, 1, key='min_score')
 with col_2:
     max_value = st.slider('Select maximum educational score', 1, 10, 10, key='max_score')
-ds = load_data(min_value, max_value)
 selected_category_type = st.selectbox("Select a topic", categories)
 categories = list(set(ds["category"]))
 selected_cluster = ds.filter(lambda x: x['category'] == selected_category)

 Additionally, the model was tasked with finding the topic of each cluster.
 """)
 @st.cache_data
+def load_data(min_score=1, max_score=10, show_special=False):
     ds = load_dataset("HuggingFaceTB/FW_clusters_free_topics", split="train", token=HF_TOKEN, num_proc=2)
+    def filter_func(x):
+        try:
+            score = int(x['educational_score'])
+            return max(min_score <= score <= max_score, show_special)
+        except (ValueError, TypeError):
+            # Return True if show_special is checked and educational_score is None or ''
+            return show_special
+    ds = ds.filter(filter_func)
     return ds
 st.subheader("Cluster information")
+col_1, col_2, col_3 = st.columns(2)
 with col_1:
     min_value = st.slider('Select minimum educational score', 1, 10, 1, key='min_score')
 with col_2:
     max_value = st.slider('Select maximum educational score', 1, 10, 10, key='max_score')
+with col_3:
+    show_special = st.checkbox('Show clusters with undefined educational score', False)
+# Load data based on slider values and checkbox status
+ds = load_data(min_value, max_value, show_special)
 selected_category_type = st.selectbox("Select a topic", categories)
 categories = list(set(ds["category"]))
 selected_cluster = ds.filter(lambda x: x['category'] == selected_category)