sanchit-gandhi HF staff commited on
Commit
c5fe7df
1 Parent(s): a804208

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +186 -0
app.py ADDED
@@ -0,0 +1,186 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from pandas import read_csv
3
+ import os
4
+ from evaluate import load
5
+ from huggingface_hub import Repository
6
+ import zipfile
7
+
8
+ # first define URLs for the reference and submission datasets on the Hub
9
+ REFERENCE_NAME = "references"
10
+ SUBMISSION_NAME = "submissions"
11
+
12
+ REFERENCE_URL = os.path.join(
13
+ "https://huggingface.co/datasets/xtreme-s", REFERENCE_NAME
14
+ )
15
+ SUBMISSION_URL = os.path.join(
16
+ "https://huggingface.co/datasets/xtreme-s", SUBMISSION_NAME
17
+ )
18
+
19
+ # grab these repos using the token provided
20
+ HF_TOKEN = os.environ.get("HF_TOKEN")
21
+
22
+ reference_repo = Repository(
23
+ local_dir="references", clone_from=REFERENCE_URL, use_auth_token=HF_TOKEN
24
+ )
25
+ submission_repo = Repository(
26
+ local_dir="submissions", clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN
27
+ )
28
+ submission_repo.git_pull()
29
+
30
+ all_submissions = [
31
+ folder
32
+ for folder in os.listdir(SUBMISSION_NAME)
33
+ if os.path.isdir(os.path.join(SUBMISSION_NAME, folder)) and folder != ".git"
34
+ ]
35
+
36
+ # define the XTREME-S test sets
37
+ TEST_SETS = [
38
+ "fleurs",
39
+ "mls",
40
+ "vp",
41
+ "covost-2",
42
+ "f-lid",
43
+ "m-14",
44
+ ]
45
+
46
+ EXPECTED_TEST_FILES = [f + ".txt" for f in TEST_SETS]
47
+
48
+ # define the optional test sets
49
+ OPTIONAL_TEST_SETS = ["f-r5"]
50
+ OPTIONAL_TEST_FILES = [f + ".txt" for f in OPTIONAL_TEST_SETS]
51
+
52
+ # load all metrics
53
+ wer_metric = load("wer")
54
+ bleu_metric = load("bleu")
55
+ acc_metric = load("accuracy")
56
+ f1_metric = load("f1")
57
+
58
+ # map test set to metric
59
+ METRIC_MAP = {
60
+ "fleurs": wer_metric,
61
+ "mls": wer_metric,
62
+ "vp": wer_metric,
63
+ "covost-2": bleu_metric,
64
+ "f-lid": acc_metric,
65
+ "m-14": f1_metric,
66
+ }
67
+
68
+
69
+ def compute_score(pred_file, ref_file, metric):
70
+ """Assess predicted file against reference file for a given metric."""
71
+ with open(pred_file, "r", encoding="utf-8") as pred, open(
72
+ ref_file, "r", encoding="utf-8"
73
+ ) as ref:
74
+ # TODO: any post-processing required?
75
+ pred_lines = [line.strip() for line in pred.readlines()]
76
+ ref_lines = [line.strip() for line in ref.readlines()]
77
+
78
+ score = metric(ref_lines, pred_lines)
79
+ return score
80
+
81
+ # load up the results file
82
+ CSV_RESULTS_FILE = os.path.join(SUBMISSION_NAME, "results.csv")
83
+
84
+ all_results = read_csv(CSV_RESULTS_FILE)
85
+
86
+ # Write table form CSV
87
+ table = all_results.copy()
88
+
89
+ # make sure the column ordering is correct (name, average-score, fleurs, mls, ...)
90
+ average_column = table.pop("average-score")
91
+ name_column = table.pop("name")
92
+ table.insert(0, "average-score", average_column)
93
+ table = table.select_dtypes(exclude=["object", "string"])
94
+ table.insert(0, "name", name_column)
95
+ table = table.sort_values(by=["average-score"], ascending=False, ignore_index=True)
96
+ table = table.round(2)
97
+ table.index = table.index + 1
98
+
99
+ # Streamlit
100
+ st.markdown("# XTREME-S: Evaluating Cross-lingual Speech Representations")
101
+
102
+ st.markdown(
103
+ f"""
104
+ This is the leaderboard of the XTREME-S benchmark.
105
+ Submitted systems are ranked by the **ESB Score** which is the average of
106
+ all non-optional datasets: {", ".join(TEST_SETS)}. The optional dataset of f-r5 does not contribute to the average score."""
107
+ )
108
+
109
+ # st.table(table)
110
+ st.dataframe(table.style.format(subset=["esb-score", *TEST_SETS, *OPTIONAL_TEST_SETS], formatter="{:.1f}"))
111
+
112
+ st.markdown(
113
+ """
114
+ XTREME-S was proposed in *XTREME-S: Evaluating Cross-lingual Speech Representations*, by Conneau et. al.
115
+ \n
116
+ The abstract of the paper is as follows:
117
+ \n
118
+ *We introduce XTREME-S, a new benchmark to evaluate universal cross-lingual speech representations in many languages. XTREME-S covers four task families: speech recognition, classification, speech-to-text translation and retrieval. Covering 102 languages from 10+ language families, 3 different domains and 4 task families, XTREME-S aims to simplify multilingual speech representation evaluation, as well as catalyze research in "universal" speech representation learning. This paper describes the new benchmark and establishes the first speech-only and speech-text baselines using XLS-R and mSLAM on all downstream tasks. We motivate the design choices and detail how to use the benchmark.*
119
+ \n
120
+ For more information, refer to the paper submission on [Arxiv](https://arxiv.org/abs/2203.10752).
121
+ """
122
+ )
123
+
124
+ st.markdown(
125
+ """
126
+ ## Submitting to XTREME-S
127
+ \n
128
+ To submit to XTREME-S, download the audio data for the mandatory XTREME-S test sets from [xtreme-s/datasets](https://huggingface.co/datasets/xtreme-s/datasets). The test sets contain audio data only. Evaluate your system on the nine test sets by generating predictions for the unlabelled audio samples. For each test set, save the predictions to a .txt file in the order that the audio samples are provided, with one prediction per line. Name the .txt file according to the XTREME-S test set names shown in the table (e.g. the predictions for Fleurs should be named fleurs.txt).
129
+ \n
130
+ Once you have evaluated your system on all of the six mandatory test sets, move the predictions into one folder and zip it. The name you assign to the zipped folder will be the name that is shown on the table (e.g. mSLAM.zip will be displayed as mSLAM). Upload your zipped submissions for scoring and placement on the leaderboard.
131
+ \n
132
+ Should you experience any issues, open an issue using the link [new discussion](https://huggingface.co/spaces/xtreme-s/leaderboard/discussions/new) and tag `@sanchit-gandhi`.
133
+ """
134
+ )
135
+
136
+ # Using the "with" syntax
137
+ with st.form(key="my_form"):
138
+ uploaded_file = st.file_uploader("Choose a zip file")
139
+ submit_button = st.form_submit_button(label="Submit")
140
+
141
+ if submit_button:
142
+ if uploaded_file is None:
143
+ raise ValueError("Please make sure to have uploaded a zip file.")
144
+
145
+ submission = uploaded_file.name.split(".zip")[0]
146
+ with st.spinner(f"Uploading {submission}..."):
147
+ with zipfile.ZipFile(uploaded_file, 'r') as zip_ref:
148
+ zip_ref.extractall(submission_repo.local_dir)
149
+ submission_repo.push_to_hub()
150
+
151
+ with st.spinner(f"Computing XTREME-S Score for {submission}..."):
152
+ results = {"name": submission}
153
+ all_submitted_files = os.listdir(os.path.join(SUBMISSION_NAME, submission))
154
+
155
+ submitted_files = [f for f in all_submitted_files if f in EXPECTED_TEST_FILES]
156
+ submitted_optional_files = [f for f in all_submitted_files if f in OPTIONAL_TEST_FILES]
157
+
158
+ if sorted(EXPECTED_TEST_FILES) != sorted(submitted_files):
159
+ raise ValueError(
160
+ f"{', '.join(submitted_files)} were submitted, but expected {', '.join(EXPECTED_TEST_FILES)}"
161
+ )
162
+
163
+ for file in submitted_files:
164
+ ref_file = os.path.join(REFERENCE_NAME, file)
165
+ pred_file = os.path.join(SUBMISSION_NAME, submission, file)
166
+
167
+ test_set = file.split(".")[0]
168
+ metric = METRIC_MAP[test_set]
169
+
170
+ score = compute_score(pred_file, ref_file, metric)
171
+ results[test_set] = round(100 * score, 2)
172
+
173
+ # TODO: assessment of 'optional' test sets
174
+
175
+ # XTREME-S score is computed over the mandatory test sets only
176
+ average_score = 0.4 * (100 - (results["fleurs"] + results["mls"] + results["vp"]) / 3) + 0.4 * results[
177
+ "covost-2"] + 0.2 * (results["f-lid"] + results["m-14"]) / 2
178
+ results["average-score"] = round(average_score, 2)
179
+
180
+ all_results = all_results.append(results, ignore_index=True)
181
+
182
+ # save and upload new evaluated results
183
+ all_results.to_csv(CSV_RESULTS_FILE, index=False)
184
+ commit_url = submission_repo.push_to_hub()
185
+
186
+ st.success('Please refresh this space (CTRL+R) to see your result')