HugoLaurencon HF staff commited on
Commit
fa81556
1 Parent(s): 0610f9d

visualization: small step for the slider on flagged words ratio

Browse files
Files changed (1) hide show
  1. app.py +41 -15
app.py CHANGED
@@ -282,9 +282,16 @@ class Visualization_for_lang:
282
  new_flagged_words,
283
  )
284
  cutoff_def = "If the flagged words ratio of a document is higher than this number, the document is removed."
285
- max_fwr = float(np.max(self.docs["flagged_words_ratio"]))
 
 
286
  cutoff_flagged_words_ratio = st.slider(
287
- cutoff_def, 0.0, max_fwr, max_fwr, step=0.001
 
 
 
 
 
288
  )
289
  new_key = ("flagged_words_ratio", cutoff_flagged_words_ratio, True)
290
  keys.append(new_key)
@@ -481,7 +488,9 @@ class Visualization_for_lang:
481
  for i in range(len(self.words["incorrect_substrings"]))
482
  ]
483
  )
484
- Visualization_for_lang.print_discarded_by_cond(cond_incorrect_substrings)
 
 
485
  conds_words["incorrect_substrings"] = cond_incorrect_substrings
486
 
487
  all_conds_words = np.all(list(conds_words.values()), axis=0)
@@ -723,7 +732,10 @@ class Visualization:
723
  )
724
 
725
  def choose_lang(self):
726
- options = [self.param_visu_langs[lang_dataset_id]["lang"] for lang_dataset_id in self.param_visu_langs]
 
 
 
727
  index = options.index("English") if ("English" in options) else 0
728
  lang_chosen = st.selectbox(
729
  label="Select the language for visualization",
@@ -731,23 +743,37 @@ class Visualization:
731
  index=index,
732
  )
733
  if lang_chosen != "None":
734
- lang_chosen_dataset_id = langs_id.loc[langs_id["lang"] == lang_chosen, "dataset_id"].iloc[0]
 
 
735
  visualization_for_lang = Visualization_for_lang(
736
- path_data = self.param_visu_langs[lang_chosen_dataset_id]["path_data"],
737
- lang = self.param_visu_langs[lang_chosen_dataset_id]["lang"],
738
- num_docs = self.param_visu_langs[lang_chosen_dataset_id]["num_docs"],
739
- num_docs_for_words = self.param_visu_langs[lang_chosen_dataset_id]["num_docs_for_words"],
740
- max_len_text_display = self.param_visu_langs[lang_chosen_dataset_id]["max_len_text_display"],
741
- lang_dataset_id = self.param_visu_langs[lang_chosen_dataset_id]["lang_dataset_id"],
742
- path_fasttext_model = self.param_visu_langs[lang_chosen_dataset_id]["path_fasttext_model"],
743
- path_sentencepiece_model = self.param_visu_langs[lang_chosen_dataset_id]["path_sentencepiece_model"],
744
- path_kenlm_model = self.param_visu_langs[lang_chosen_dataset_id]["path_kenlm_model"],
 
 
 
 
 
 
 
 
 
 
 
 
745
  )
746
  visualization_for_lang.visualization_for_lang()
747
 
748
  def visualization(self):
749
  self.preamble()
750
- self.warning_preamble()
751
  self.choose_lang()
752
 
753
 
 
282
  new_flagged_words,
283
  )
284
  cutoff_def = "If the flagged words ratio of a document is higher than this number, the document is removed."
285
+ max_fwr = np.max(self.docs["flagged_words_ratio"])
286
+ max_fwr = np.ceil(max_fwr * 1000) / 1000
287
+ max_fwr = float(max_fwr)
288
  cutoff_flagged_words_ratio = st.slider(
289
+ cutoff_def,
290
+ 0.000,
291
+ max_fwr,
292
+ max_fwr,
293
+ step=0.001,
294
+ format="%f",
295
  )
296
  new_key = ("flagged_words_ratio", cutoff_flagged_words_ratio, True)
297
  keys.append(new_key)
 
488
  for i in range(len(self.words["incorrect_substrings"]))
489
  ]
490
  )
491
+ Visualization_for_lang.print_discarded_by_cond(
492
+ cond_incorrect_substrings
493
+ )
494
  conds_words["incorrect_substrings"] = cond_incorrect_substrings
495
 
496
  all_conds_words = np.all(list(conds_words.values()), axis=0)
 
732
  )
733
 
734
  def choose_lang(self):
735
+ options = [
736
+ self.param_visu_langs[lang_dataset_id]["lang"]
737
+ for lang_dataset_id in self.param_visu_langs
738
+ ]
739
  index = options.index("English") if ("English" in options) else 0
740
  lang_chosen = st.selectbox(
741
  label="Select the language for visualization",
 
743
  index=index,
744
  )
745
  if lang_chosen != "None":
746
+ lang_chosen_dataset_id = langs_id.loc[
747
+ langs_id["lang"] == lang_chosen, "dataset_id"
748
+ ].iloc[0]
749
  visualization_for_lang = Visualization_for_lang(
750
+ path_data=self.param_visu_langs[lang_chosen_dataset_id]["path_data"],
751
+ lang=self.param_visu_langs[lang_chosen_dataset_id]["lang"],
752
+ num_docs=self.param_visu_langs[lang_chosen_dataset_id]["num_docs"],
753
+ num_docs_for_words=self.param_visu_langs[lang_chosen_dataset_id][
754
+ "num_docs_for_words"
755
+ ],
756
+ max_len_text_display=self.param_visu_langs[lang_chosen_dataset_id][
757
+ "max_len_text_display"
758
+ ],
759
+ lang_dataset_id=self.param_visu_langs[lang_chosen_dataset_id][
760
+ "lang_dataset_id"
761
+ ],
762
+ path_fasttext_model=self.param_visu_langs[lang_chosen_dataset_id][
763
+ "path_fasttext_model"
764
+ ],
765
+ path_sentencepiece_model=self.param_visu_langs[lang_chosen_dataset_id][
766
+ "path_sentencepiece_model"
767
+ ],
768
+ path_kenlm_model=self.param_visu_langs[lang_chosen_dataset_id][
769
+ "path_kenlm_model"
770
+ ],
771
  )
772
  visualization_for_lang.visualization_for_lang()
773
 
774
  def visualization(self):
775
  self.preamble()
776
+ # self.warning_preamble()
777
  self.choose_lang()
778
 
779