mj-new commited on
Commit
5d90238
1 Parent(s): 7504a25

Added totals for size metrics

Browse files
app.py CHANGED
@@ -69,7 +69,7 @@ with analysis_bigos:
69
  df_sum_stats_all_splits = extract_stats_all_splits(df_multindex_all_splits, metrics_features)
70
 
71
  st.subheader("Dataset features (text)")
72
- df_sum_stats_feats_text = df_sum_stats_all_splits[metrics_features[0:2]]
73
  st.dataframe(df_sum_stats_feats_text)
74
 
75
  st.subheader("Dataset features (audio)")
 
69
  df_sum_stats_all_splits = extract_stats_all_splits(df_multindex_all_splits, metrics_features)
70
 
71
  st.subheader("Dataset features (text)")
72
+ df_sum_stats_feats_text = df_sum_stats_all_splits[metrics_features[0:3]]
73
  st.dataframe(df_sum_stats_feats_text)
74
 
75
  st.subheader("Dataset features (audio)")
reports/amu-cai/pl-asr-bigos-v2/dataset_contents.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6c1688622bcf255ec8d8f916da14be0db3d99a3739399a180114e3cac91babd0
3
  size 46668863
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9d906a6794c9928df18c639c5b6095f304a64e3b1aa89a7a7ae62ec6e5ea398
3
  size 46668863
reports/amu-cai/pl-asr-bigos-v2/dataset_statistics.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3da95373d89bfbfa7b9785a6c5ff73d274cc44bef31f1a7bf24be5ed25fe7af6
3
- size 22620
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c6f9fbd2061fe698519347857bd047c5d756d9358d988d86f8684d7d533c390
3
+ size 22627
reports/pelcra/pl-asr-pelcra-for-bigos/dataset_contents.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e64390c8275e4c2593f84b50d0334d158f8930349946467fd6e461517ff1c213
3
  size 95274266
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:341199e4818ae3327eb100d85d0e2ed4d7a3bf81d0f70914d3c434ad763eb30c
3
  size 95274266
reports/pelcra/pl-asr-pelcra-for-bigos/dataset_statistics.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3141e906850013e858c109d4b365c680ebb0515f4e858e7e67257b511410d83d
3
- size 29086
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65f591c54e5a1daedc640482f88fd65acd0b2fb193e01f4ed58fb81c54d70cea
3
+ size 29098
utils.py CHANGED
@@ -192,7 +192,7 @@ def uniq_chars_per_split(dataset_hf, dataset_hf_secret):
192
  words_all = " ".join(utts_all).split(" ")
193
  words_uniq = list(set(words_all))
194
  chars_uniq = list(set("".join(words_uniq)))
195
- chars_uniq_count = len(chars_uniq)
196
  #print(split, chars_uniq_count)
197
  out_dict[split] = chars_uniq_count
198
  out_chars_list = out_chars_list + chars_uniq
@@ -560,7 +560,8 @@ def extract_stats_to_agg(df_multindex_per_split, metrics, add_total=True):
560
  df_agg_splits.columns = df_agg_splits.columns.droplevel(0)
561
 
562
  if(add_total):
563
- df_agg_splits = df_agg_splits
 
564
 
565
  return(df_agg_splits)
566
 
 
192
  words_all = " ".join(utts_all).split(" ")
193
  words_uniq = list(set(words_all))
194
  chars_uniq = list(set("".join(words_uniq)))
195
+ chars_uniq_count = len(chars_uniq) + 1
196
  #print(split, chars_uniq_count)
197
  out_dict[split] = chars_uniq_count
198
  out_chars_list = out_chars_list + chars_uniq
 
560
  df_agg_splits.columns = df_agg_splits.columns.droplevel(0)
561
 
562
  if(add_total):
563
+ # add row with the sum of all rows
564
+ df_agg_splits.loc['total'] = df_agg_splits.sum()
565
 
566
  return(df_agg_splits)
567