Spaces:
Runtime error
Runtime error
HugoLaurencon
commited on
Commit
•
649ea6a
1
Parent(s):
8f0da78
add warning message
Browse files- app.py +13 -1
- explanation_filtering_pipeline.pdf +0 -0
app.py
CHANGED
@@ -54,6 +54,15 @@ class Visualization:
|
|
54 |
lang_dataset_id, path_kenlm_model
|
55 |
)
|
56 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
57 |
def preamble(self):
|
58 |
st.markdown(
|
59 |
"Before diving into this demo, you might want to take a look at how the filtering pipeline looks like in more detail."
|
@@ -170,7 +179,9 @@ class Visualization:
|
|
170 |
)
|
171 |
self.docs = self.docs_checkpoint
|
172 |
for i in range(len(self.docs["repetitions_ratio"])):
|
173 |
-
self.docs["repetitions_ratio"].iloc[i] = self.docs[
|
|
|
|
|
174 |
|
175 |
cutoff_def = "If the repetitions ratio of a document is higher than this number, the document is removed."
|
176 |
cutoff_repetitions_ratio = st.sidebar.slider(
|
@@ -526,6 +537,7 @@ class Visualization:
|
|
526 |
)
|
527 |
|
528 |
def visualization(self):
|
|
|
529 |
self.preamble()
|
530 |
self.open_data()
|
531 |
self.set_title()
|
|
|
54 |
lang_dataset_id, path_kenlm_model
|
55 |
)
|
56 |
|
57 |
+
def warning_preamble(self):
|
58 |
+
st.markdown(
|
59 |
+
"This demo can be a little slow, and only allows you to process up to 5000 documents "
|
60 |
+
"for a decent speed. If you want to display up to three times more documents and have "
|
61 |
+
"a faster visualization, we invite you to run this "
|
62 |
+
"[code](https://github.com/bigscience-workshop/data_tooling/tree/master/ac_dc/visualization) "
|
63 |
+
"on your computer."
|
64 |
+
)
|
65 |
+
|
66 |
def preamble(self):
|
67 |
st.markdown(
|
68 |
"Before diving into this demo, you might want to take a look at how the filtering pipeline looks like in more detail."
|
|
|
179 |
)
|
180 |
self.docs = self.docs_checkpoint
|
181 |
for i in range(len(self.docs["repetitions_ratio"])):
|
182 |
+
self.docs["repetitions_ratio"].iloc[i] = self.docs[
|
183 |
+
"repetitions_ratio"
|
184 |
+
].iloc[i][repetitions_length]
|
185 |
|
186 |
cutoff_def = "If the repetitions ratio of a document is higher than this number, the document is removed."
|
187 |
cutoff_repetitions_ratio = st.sidebar.slider(
|
|
|
537 |
)
|
538 |
|
539 |
def visualization(self):
|
540 |
+
self.warning_preamble()
|
541 |
self.preamble()
|
542 |
self.open_data()
|
543 |
self.set_title()
|
explanation_filtering_pipeline.pdf
CHANGED
Binary files a/explanation_filtering_pipeline.pdf and b/explanation_filtering_pipeline.pdf differ
|
|