sanchit-gandhi HF staff commited on
Commit
bd61ad8
1 Parent(s): aca408b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -26
app.py CHANGED
@@ -9,22 +9,14 @@ import zipfile
9
  REFERENCE_NAME = "references"
10
  SUBMISSION_NAME = "submissions"
11
 
12
- REFERENCE_URL = os.path.join(
13
- "https://huggingface.co/datasets/xtreme-s", REFERENCE_NAME
14
- )
15
- SUBMISSION_URL = os.path.join(
16
- "https://huggingface.co/datasets/xtreme-s", SUBMISSION_NAME
17
- )
18
 
19
  # grab these repos using the token provided
20
  HF_TOKEN = os.environ.get("HF_TOKEN")
21
 
22
- reference_repo = Repository(
23
- local_dir="references", clone_from=REFERENCE_URL, use_auth_token=HF_TOKEN
24
- )
25
- submission_repo = Repository(
26
- local_dir="submissions", clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN
27
- )
28
  submission_repo.git_pull()
29
 
30
  all_submissions = [
@@ -46,7 +38,7 @@ TEST_SETS = [
46
  EXPECTED_TEST_FILES = [f + ".txt" for f in TEST_SETS]
47
 
48
  # define the optional test sets - ignore for now
49
- OPTIONAL_TEST_SETS = [] #["f-r5"]
50
  OPTIONAL_TEST_FILES = [f + ".txt" for f in OPTIONAL_TEST_SETS]
51
 
52
  # load all metrics
@@ -68,9 +60,7 @@ METRIC_MAP = {
68
 
69
  def compute_score(pred_file, ref_file, metric):
70
  """Assess predicted file against reference file for a given metric."""
71
- with open(pred_file, "r", encoding="utf-8") as pred, open(
72
- ref_file, "r", encoding="utf-8"
73
- ) as ref:
74
  # TODO: any post-processing required?
75
  pred_lines = [line.strip() for line in pred.readlines()]
76
  ref_lines = [line.strip() for line in ref.readlines()]
@@ -78,6 +68,7 @@ def compute_score(pred_file, ref_file, metric):
78
  score = metric(ref_lines, pred_lines)
79
  return score
80
 
 
81
  # load up the results file
82
  CSV_RESULTS_FILE = os.path.join(SUBMISSION_NAME, "results.csv")
83
 
@@ -103,18 +94,21 @@ table.index = table.index + 1
103
  st.markdown("# XTREME-S: Evaluating Cross-lingual Speech Representations")
104
 
105
  st.markdown(
106
- """
107
- This is the leaderboard for the XTREME-S benchmark.
108
- Submitted systems are ranked by the **average score**, which is a weighted average of
109
- the mandatory test sets:
 
 
110
  $$
111
  \begin{gathered}
112
  0.4 *\left(100-\frac{\text{Fleurs}+\text{MLS}+\text{VP}}{3}\right)_{(\mathrm{WER})}+ \\
113
  0.4 * \text{CoVoST}-2_{(\mathrm{BLEU})}+0.2 *\left(\frac{\text{F-LID}+\text{M-14}}{2}\right)_{(\mathrm{Acc})}
114
  \end{gathered}
115
  $$
116
- The optional dataset of f-r5 does not contribute to the average score."""
117
  )
 
118
 
119
  # st.table(table)
120
  st.dataframe(table.style.format(subset=["average-score", *TEST_SETS, *OPTIONAL_TEST_SETS], formatter="{:.1f}"))
@@ -154,7 +148,7 @@ if submit_button:
154
 
155
  submission = uploaded_file.name.split(".zip")[0]
156
  with st.spinner(f"Uploading {submission}..."):
157
- with zipfile.ZipFile(uploaded_file, 'r') as zip_ref:
158
  zip_ref.extractall(submission_repo.local_dir)
159
  submission_repo.push_to_hub()
160
 
@@ -179,12 +173,15 @@ if submit_button:
179
 
180
  score = compute_score(pred_file, ref_file, metric)
181
  results[test_set] = round(100 * score, 2)
182
-
183
  # TODO: assessment of 'optional' test sets
184
 
185
  # XTREME-S score is computed over the mandatory test sets only
186
- average_score = 0.4 * (100 - (results["fleurs"] + results["mls"] + results["vp"]) / 3) + 0.4 * results[
187
- "covost-2"] + 0.2 * (results["f-lid"] + results["m-14"]) / 2
 
 
 
188
  results["average-score"] = round(average_score, 2)
189
 
190
  all_results = all_results.append(results, ignore_index=True)
@@ -193,4 +190,4 @@ if submit_button:
193
  all_results.to_csv(CSV_RESULTS_FILE, index=False)
194
  commit_url = submission_repo.push_to_hub()
195
 
196
- st.success('Please refresh this space (CTRL+R) to see your result')
 
9
  REFERENCE_NAME = "references"
10
  SUBMISSION_NAME = "submissions"
11
 
12
+ REFERENCE_URL = os.path.join("https://huggingface.co/datasets/xtreme-s", REFERENCE_NAME)
13
+ SUBMISSION_URL = os.path.join("https://huggingface.co/datasets/xtreme-s", SUBMISSION_NAME)
 
 
 
 
14
 
15
  # grab these repos using the token provided
16
  HF_TOKEN = os.environ.get("HF_TOKEN")
17
 
18
+ reference_repo = Repository(local_dir="references", clone_from=REFERENCE_URL, use_auth_token=HF_TOKEN)
19
+ submission_repo = Repository(local_dir="submissions", clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN)
 
 
 
 
20
  submission_repo.git_pull()
21
 
22
  all_submissions = [
 
38
  EXPECTED_TEST_FILES = [f + ".txt" for f in TEST_SETS]
39
 
40
  # define the optional test sets - ignore for now
41
+ OPTIONAL_TEST_SETS = [] # ["f-r5"]
42
  OPTIONAL_TEST_FILES = [f + ".txt" for f in OPTIONAL_TEST_SETS]
43
 
44
  # load all metrics
 
60
 
61
  def compute_score(pred_file, ref_file, metric):
62
  """Assess predicted file against reference file for a given metric."""
63
+ with open(pred_file, "r", encoding="utf-8") as pred, open(ref_file, "r", encoding="utf-8") as ref:
 
 
64
  # TODO: any post-processing required?
65
  pred_lines = [line.strip() for line in pred.readlines()]
66
  ref_lines = [line.strip() for line in ref.readlines()]
 
68
  score = metric(ref_lines, pred_lines)
69
  return score
70
 
71
+
72
  # load up the results file
73
  CSV_RESULTS_FILE = os.path.join(SUBMISSION_NAME, "results.csv")
74
 
 
94
  st.markdown("# XTREME-S: Evaluating Cross-lingual Speech Representations")
95
 
96
  st.markdown(
97
+ "This is the leaderboard for the XTREME-S benchmark. Submitted systems are ranked by the **average score**, which"
98
+ " is a weighted average of the mandatory test sets:"
99
+ )
100
+ # hacky way of getting math-mode to render
101
+ st.write(
102
+ r"""
103
  $$
104
  \begin{gathered}
105
  0.4 *\left(100-\frac{\text{Fleurs}+\text{MLS}+\text{VP}}{3}\right)_{(\mathrm{WER})}+ \\
106
  0.4 * \text{CoVoST}-2_{(\mathrm{BLEU})}+0.2 *\left(\frac{\text{F-LID}+\text{M-14}}{2}\right)_{(\mathrm{Acc})}
107
  \end{gathered}
108
  $$
109
+ """
110
  )
111
+ st.markdown("The optional dataset of f-r5 does not contribute to the average score.")
112
 
113
  # st.table(table)
114
  st.dataframe(table.style.format(subset=["average-score", *TEST_SETS, *OPTIONAL_TEST_SETS], formatter="{:.1f}"))
 
148
 
149
  submission = uploaded_file.name.split(".zip")[0]
150
  with st.spinner(f"Uploading {submission}..."):
151
+ with zipfile.ZipFile(uploaded_file, "r") as zip_ref:
152
  zip_ref.extractall(submission_repo.local_dir)
153
  submission_repo.push_to_hub()
154
 
 
173
 
174
  score = compute_score(pred_file, ref_file, metric)
175
  results[test_set] = round(100 * score, 2)
176
+
177
  # TODO: assessment of 'optional' test sets
178
 
179
  # XTREME-S score is computed over the mandatory test sets only
180
+ average_score = (
181
+ 0.4 * (100 - (results["fleurs"] + results["mls"] + results["vp"]) / 3)
182
+ + 0.4 * results["covost-2"]
183
+ + 0.2 * (results["f-lid"] + results["m-14"]) / 2
184
+ )
185
  results["average-score"] = round(average_score, 2)
186
 
187
  all_results = all_results.append(results, ignore_index=True)
 
190
  all_results.to_csv(CSV_RESULTS_FILE, index=False)
191
  commit_url = submission_repo.push_to_hub()
192
 
193
+ st.success("Please refresh this space (CTRL+R) to see your result")