Spaces:
Sleeping
Sleeping
mj-new
commited on
Commit
·
5d90238
1
Parent(s):
7504a25
Added totals for size metrics
Browse files
app.py
CHANGED
@@ -69,7 +69,7 @@ with analysis_bigos:
|
|
69 |
df_sum_stats_all_splits = extract_stats_all_splits(df_multindex_all_splits, metrics_features)
|
70 |
|
71 |
st.subheader("Dataset features (text)")
|
72 |
-
df_sum_stats_feats_text = df_sum_stats_all_splits[metrics_features[0:
|
73 |
st.dataframe(df_sum_stats_feats_text)
|
74 |
|
75 |
st.subheader("Dataset features (audio)")
|
|
|
69 |
df_sum_stats_all_splits = extract_stats_all_splits(df_multindex_all_splits, metrics_features)
|
70 |
|
71 |
st.subheader("Dataset features (text)")
|
72 |
+
df_sum_stats_feats_text = df_sum_stats_all_splits[metrics_features[0:3]]
|
73 |
st.dataframe(df_sum_stats_feats_text)
|
74 |
|
75 |
st.subheader("Dataset features (audio)")
|
reports/amu-cai/pl-asr-bigos-v2/dataset_contents.json
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 46668863
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f9d906a6794c9928df18c639c5b6095f304a64e3b1aa89a7a7ae62ec6e5ea398
|
3 |
size 46668863
|
reports/amu-cai/pl-asr-bigos-v2/dataset_statistics.json
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8c6f9fbd2061fe698519347857bd047c5d756d9358d988d86f8684d7d533c390
|
3 |
+
size 22627
|
reports/pelcra/pl-asr-pelcra-for-bigos/dataset_contents.json
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 95274266
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:341199e4818ae3327eb100d85d0e2ed4d7a3bf81d0f70914d3c434ad763eb30c
|
3 |
size 95274266
|
reports/pelcra/pl-asr-pelcra-for-bigos/dataset_statistics.json
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:65f591c54e5a1daedc640482f88fd65acd0b2fb193e01f4ed58fb81c54d70cea
|
3 |
+
size 29098
|
utils.py
CHANGED
@@ -192,7 +192,7 @@ def uniq_chars_per_split(dataset_hf, dataset_hf_secret):
|
|
192 |
words_all = " ".join(utts_all).split(" ")
|
193 |
words_uniq = list(set(words_all))
|
194 |
chars_uniq = list(set("".join(words_uniq)))
|
195 |
-
chars_uniq_count = len(chars_uniq)
|
196 |
#print(split, chars_uniq_count)
|
197 |
out_dict[split] = chars_uniq_count
|
198 |
out_chars_list = out_chars_list + chars_uniq
|
@@ -560,7 +560,8 @@ def extract_stats_to_agg(df_multindex_per_split, metrics, add_total=True):
|
|
560 |
df_agg_splits.columns = df_agg_splits.columns.droplevel(0)
|
561 |
|
562 |
if(add_total):
|
563 |
-
|
|
|
564 |
|
565 |
return(df_agg_splits)
|
566 |
|
|
|
192 |
words_all = " ".join(utts_all).split(" ")
|
193 |
words_uniq = list(set(words_all))
|
194 |
chars_uniq = list(set("".join(words_uniq)))
|
195 |
+
chars_uniq_count = len(chars_uniq) + 1
|
196 |
#print(split, chars_uniq_count)
|
197 |
out_dict[split] = chars_uniq_count
|
198 |
out_chars_list = out_chars_list + chars_uniq
|
|
|
560 |
df_agg_splits.columns = df_agg_splits.columns.droplevel(0)
|
561 |
|
562 |
if(add_total):
|
563 |
+
# add row with the sum of all rows
|
564 |
+
df_agg_splits.loc['total'] = df_agg_splits.sum()
|
565 |
|
566 |
return(df_agg_splits)
|
567 |
|