HugoLaurencon commited on
Commit
649ea6a
1 Parent(s): 8f0da78

add warning message

Browse files
Files changed (2) hide show
  1. app.py +13 -1
  2. explanation_filtering_pipeline.pdf +0 -0
app.py CHANGED
@@ -54,6 +54,15 @@ class Visualization:
54
  lang_dataset_id, path_kenlm_model
55
  )
56
 
 
 
 
 
 
 
 
 
 
57
  def preamble(self):
58
  st.markdown(
59
  "Before diving into this demo, you might want to take a look at how the filtering pipeline looks like in more detail."
@@ -170,7 +179,9 @@ class Visualization:
170
  )
171
  self.docs = self.docs_checkpoint
172
  for i in range(len(self.docs["repetitions_ratio"])):
173
- self.docs["repetitions_ratio"].iloc[i] = self.docs["repetitions_ratio"].iloc[i][repetitions_length]
 
 
174
 
175
  cutoff_def = "If the repetitions ratio of a document is higher than this number, the document is removed."
176
  cutoff_repetitions_ratio = st.sidebar.slider(
@@ -526,6 +537,7 @@ class Visualization:
526
  )
527
 
528
  def visualization(self):
 
529
  self.preamble()
530
  self.open_data()
531
  self.set_title()
 
54
  lang_dataset_id, path_kenlm_model
55
  )
56
 
57
+ def warning_preamble(self):
58
+ st.markdown(
59
+ "This demo can be a little slow, and only allows you to process up to 5000 documents "
60
+ "for a decent speed. If you want to display up to three times more documents and have "
61
+ "a faster visualization, we invite you to run this "
62
+ "[code](https://github.com/bigscience-workshop/data_tooling/tree/master/ac_dc/visualization) "
63
+ "on your computer."
64
+ )
65
+
66
  def preamble(self):
67
  st.markdown(
68
  "Before diving into this demo, you might want to take a look at how the filtering pipeline looks like in more detail."
 
179
  )
180
  self.docs = self.docs_checkpoint
181
  for i in range(len(self.docs["repetitions_ratio"])):
182
+ self.docs["repetitions_ratio"].iloc[i] = self.docs[
183
+ "repetitions_ratio"
184
+ ].iloc[i][repetitions_length]
185
 
186
  cutoff_def = "If the repetitions ratio of a document is higher than this number, the document is removed."
187
  cutoff_repetitions_ratio = st.sidebar.slider(
 
537
  )
538
 
539
  def visualization(self):
540
+ self.warning_preamble()
541
  self.preamble()
542
  self.open_data()
543
  self.set_title()
explanation_filtering_pipeline.pdf CHANGED
Binary files a/explanation_filtering_pipeline.pdf and b/explanation_filtering_pipeline.pdf differ