yourusername commited on
Commit
78cc3f9
1 Parent(s): e1cd6af

:bug: filter_vocab -> filter_words

Browse files
data_measurements/dataset_statistics.py CHANGED
@@ -341,7 +341,7 @@ class DatasetStatisticsCacheClass:
341
  ):
342
  logs.info("Reading vocab from cache")
343
  self.load_vocab()
344
- self.vocab_counts_filtered_df = filter_vocab(self.vocab_counts_df)
345
  else:
346
  logs.info("Calculating vocab afresh")
347
  if len(self.tokenized_df) == 0:
@@ -352,7 +352,7 @@ class DatasetStatisticsCacheClass:
352
  word_count_df = count_vocab_frequencies(self.tokenized_df)
353
  logs.info("Making dfs with proportion.")
354
  self.vocab_counts_df = calc_p_word(word_count_df)
355
- self.vocab_counts_filtered_df = filter_vocab(self.vocab_counts_df)
356
  if save:
357
  logs.info("Writing out.")
358
  write_df(self.vocab_counts_df, self.vocab_counts_df_fid)
341
  ):
342
  logs.info("Reading vocab from cache")
343
  self.load_vocab()
344
+ self.vocab_counts_filtered_df = filter_words(self.vocab_counts_df)
345
  else:
346
  logs.info("Calculating vocab afresh")
347
  if len(self.tokenized_df) == 0:
352
  word_count_df = count_vocab_frequencies(self.tokenized_df)
353
  logs.info("Making dfs with proportion.")
354
  self.vocab_counts_df = calc_p_word(word_count_df)
355
+ self.vocab_counts_filtered_df = filter_words(self.vocab_counts_df)
356
  if save:
357
  logs.info("Writing out.")
358
  write_df(self.vocab_counts_df, self.vocab_counts_df_fid)