Yacine Jernite commited on
Commit
b7bd3e0
1 Parent(s): 06262b2

catch missing text_dset

Browse files
Files changed (1) hide show
  1. data_measurements/streamlit_utils.py +21 -18
data_measurements/streamlit_utils.py CHANGED
@@ -248,24 +248,27 @@ def expander_text_embeddings(
248
  else:
249
  show_node_default = len(node_list) // 2
250
  st.markdown("---\n")
251
- show_node = st.selectbox(
252
- f"Choose a leaf node to explore in the{column_id} dataset:",
253
- range(len(node_list)),
254
- index=show_node_default,
255
- )
256
- node = node_list[show_node]
257
- start_id = st.slider(
258
- f"Show closest sentences in cluster to the centroid{column_id} starting at index:",
259
- 0,
260
- len(node["sorted_examples_centroid"]) - 5,
261
- value=0,
262
- step=5,
263
- )
264
- for sid, sim in node["sorted_examples_centroid"][start_id : start_id + 5]:
265
- # only show the first 4 lines and the first 10000 characters
266
- show_text = text_dset[sid][text_field][:10000]
267
- show_text = "\n".join(show_text.split("\n")[:4])
268
- st.text(f"{sim:.3f} \t {show_text}")
 
 
 
269
 
270
 
271
  ### Then, show duplicates
 
248
  else:
249
  show_node_default = len(node_list) // 2
250
  st.markdown("---\n")
251
+ if text_dset is None:
252
+ st.markdown("Missing source text to show, check back later!")
253
+ else:
254
+ show_node = st.selectbox(
255
+ f"Choose a leaf node to explore in the{column_id} dataset:",
256
+ range(len(node_list)),
257
+ index=show_node_default,
258
+ )
259
+ node = node_list[show_node]
260
+ start_id = st.slider(
261
+ f"Show closest sentences in cluster to the centroid{column_id} starting at index:",
262
+ 0,
263
+ len(node["sorted_examples_centroid"]) - 5,
264
+ value=0,
265
+ step=5,
266
+ )
267
+ for sid, sim in node["sorted_examples_centroid"][start_id : start_id + 5]:
268
+ # only show the first 4 lines and the first 10000 characters
269
+ show_text = text_dset[sid][text_field][:10000]
270
+ show_text = "\n".join(show_text.split("\n")[:4])
271
+ st.text(f"{sim:.3f} \t {show_text}")
272
 
273
 
274
  ### Then, show duplicates