Yacine Jernite commited on
Commit
9bb1a4c
2 Parent(s): 1a77039 e0ada71

Merge branch 'main' of https://huggingface.co/spaces/huggingface/data-measurements-tool-2 into main

Browse files
app.py CHANGED
@@ -117,7 +117,10 @@ def load_or_prepare(ds_args, show_embeddings, use_cache=False):
117
  logs.warning("Loading Embeddings")
118
  dstats.load_or_prepare_embeddings()
119
  logs.warning("Loading nPMI")
120
- dstats.load_or_prepare_npmi()
 
 
 
121
  logs.warning("Loading Zipf")
122
  dstats.load_or_prepare_zipf()
123
  return dstats
 
117
  logs.warning("Loading Embeddings")
118
  dstats.load_or_prepare_embeddings()
119
  logs.warning("Loading nPMI")
120
+ try:
121
+ dstats.load_or_prepare_npmi()
122
+ except:
123
+ logs.warning("Missing a cache for npmi")
124
  logs.warning("Loading Zipf")
125
  dstats.load_or_prepare_zipf()
126
  return dstats
cache_dir/c4_realnewslike_train_text/text_dset/dataset.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9813f70c9be641905ca737aa8f16e29d6aa17155a76cd830e7a627aed91431f4
3
+ size 529606944
cache_dir/c4_realnewslike_train_text/text_dset/dataset_info.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff9f59542efc98b40f23b64408e3fbaed544ad8f0d1fb1e7126ead5af52844ac
3
+ size 945
cache_dir/c4_realnewslike_train_text/text_dset/state.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2f6884f5ee381e5df2d267dae699aaf4792ba06c8f16830c9c19c144b4b3003
3
+ size 256
cache_dir/squad_v2_squad_v2_train_title/text_dset/dataset.arrow ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1900ec14c31551d40328c314dc2bf9f9a868b201a50a0a811ff81cc5013f03f7
3
+ size 2414544
cache_dir/squad_v2_squad_v2_train_title/text_dset/dataset_info.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:622d414fc0ad5999a683527b4fe5f08608085c66fcd590a66a461b89858349a5
3
+ size 2085
cache_dir/squad_v2_squad_v2_train_title/text_dset/state.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee31a935ee51d6d450089aa2d477d6db8e39d20076ad0ce8a204676a8e2c43c6
3
+ size 256
data_measurements/dataset_statistics.py CHANGED
@@ -498,7 +498,7 @@ class DatasetStatisticsCacheClass:
498
  if not self.live:
499
  if self.tokenized_df is None:
500
  logs.warning("Tokenized dataset not yet loaded; doing so.")
501
- self.load_or_prepare_dataset()
502
  if self.vocab_counts_df is None:
503
  logs.warning("Vocab not yet loaded; doing so.")
504
  self.load_or_prepare_vocab()
@@ -544,8 +544,8 @@ class DatasetStatisticsCacheClass:
544
  """
545
  logs.info("Doing text dset.")
546
  self.load_or_prepare_text_dset(save)
547
- logs.info("Doing tokenized dataframe")
548
- self.load_or_prepare_tokenized_df(save)
549
  logs.info("Doing dataset peek")
550
  self.load_or_prepare_dset_peek(save)
551
 
 
498
  if not self.live:
499
  if self.tokenized_df is None:
500
  logs.warning("Tokenized dataset not yet loaded; doing so.")
501
+ self.load_or_prepare_tokenized_df()
502
  if self.vocab_counts_df is None:
503
  logs.warning("Vocab not yet loaded; doing so.")
504
  self.load_or_prepare_vocab()
 
544
  """
545
  logs.info("Doing text dset.")
546
  self.load_or_prepare_text_dset(save)
547
+ #logs.info("Doing tokenized dataframe")
548
+ #self.load_or_prepare_tokenized_df(save)
549
  logs.info("Doing dataset peek")
550
  self.load_or_prepare_dset_peek(save)
551
 
data_measurements/streamlit_utils.py CHANGED
@@ -20,7 +20,7 @@ import streamlit as st
20
  from st_aggrid import AgGrid, GridOptionsBuilder
21
 
22
  from .dataset_utils import HF_DESC_FIELD, HF_FEATURE_FIELD, HF_LABEL_FIELD
23
-
24
 
25
  def sidebar_header():
26
  st.sidebar.markdown(
 
20
  from st_aggrid import AgGrid, GridOptionsBuilder
21
 
22
  from .dataset_utils import HF_DESC_FIELD, HF_FEATURE_FIELD, HF_LABEL_FIELD
23
+ st.set_option('deprecation.showPyplotGlobalUse', False)
24
 
25
  def sidebar_header():
26
  st.sidebar.markdown(