meg-huggingface commited on
Commit
df659f9
2 Parent(s): cda45dd b7bd3e0

Merge branch 'main' of https://huggingface.co/spaces/huggingface/data-measurements-tool-2 into main

Browse files
Files changed (1) hide show
  1. data_measurements/streamlit_utils.py +21 -18
data_measurements/streamlit_utils.py CHANGED
@@ -251,24 +251,27 @@ def expander_text_embeddings(
251
  else:
252
  show_node_default = len(node_list) // 2
253
  st.markdown("---\n")
254
- show_node = st.selectbox(
255
- f"Choose a leaf node to explore in the{column_id} dataset:",
256
- range(len(node_list)),
257
- index=show_node_default,
258
- )
259
- node = node_list[show_node]
260
- start_id = st.slider(
261
- f"Show closest sentences in cluster to the centroid{column_id} starting at index:",
262
- 0,
263
- len(node["sorted_examples_centroid"]) - 5,
264
- value=0,
265
- step=5,
266
- )
267
- for sid, sim in node["sorted_examples_centroid"][start_id : start_id + 5]:
268
- # only show the first 4 lines and the first 10000 characters
269
- show_text = text_dset[sid][text_field][:10000]
270
- show_text = "\n".join(show_text.split("\n")[:4])
271
- st.text(f"{sim:.3f} \t {show_text}")
 
 
 
272
 
273
 
274
  ### Then, show duplicates
 
251
  else:
252
  show_node_default = len(node_list) // 2
253
  st.markdown("---\n")
254
+ if text_dset is None:
255
+ st.markdown("Missing source text to show, check back later!")
256
+ else:
257
+ show_node = st.selectbox(
258
+ f"Choose a leaf node to explore in the{column_id} dataset:",
259
+ range(len(node_list)),
260
+ index=show_node_default,
261
+ )
262
+ node = node_list[show_node]
263
+ start_id = st.slider(
264
+ f"Show closest sentences in cluster to the centroid{column_id} starting at index:",
265
+ 0,
266
+ len(node["sorted_examples_centroid"]) - 5,
267
+ value=0,
268
+ step=5,
269
+ )
270
+ for sid, sim in node["sorted_examples_centroid"][start_id : start_id + 5]:
271
+ # only show the first 4 lines and the first 10000 characters
272
+ show_text = text_dset[sid][text_field][:10000]
273
+ show_text = "\n".join(show_text.split("\n")[:4])
274
+ st.text(f"{sim:.3f} \t {show_text}")
275
 
276
 
277
  ### Then, show duplicates