Spaces:

xtreme-s
/

leaderboard

Runtime error

App Files Files Community

sanchit-gandhi HF staff commited on Nov 22, 2022

Commit

bd61ad8

•

1 Parent(s): aca408b

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -26

app.py CHANGED Viewed

@@ -9,22 +9,14 @@ import zipfile
 REFERENCE_NAME = "references"
 SUBMISSION_NAME = "submissions"
-REFERENCE_URL = os.path.join(
-    "https://huggingface.co/datasets/xtreme-s", REFERENCE_NAME
-)
-SUBMISSION_URL = os.path.join(
-    "https://huggingface.co/datasets/xtreme-s", SUBMISSION_NAME
-)
 # grab these repos using the token provided
 HF_TOKEN = os.environ.get("HF_TOKEN")
-reference_repo = Repository(
-    local_dir="references", clone_from=REFERENCE_URL, use_auth_token=HF_TOKEN
-)
-submission_repo = Repository(
-    local_dir="submissions", clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN
-)
 submission_repo.git_pull()
 all_submissions = [
@@ -46,7 +38,7 @@ TEST_SETS = [
 EXPECTED_TEST_FILES = [f + ".txt" for f in TEST_SETS]
 # define the optional test sets - ignore for now
-OPTIONAL_TEST_SETS = [] #["f-r5"]
 OPTIONAL_TEST_FILES = [f + ".txt" for f in OPTIONAL_TEST_SETS]
 # load all metrics
@@ -68,9 +60,7 @@ METRIC_MAP = {
 def compute_score(pred_file, ref_file, metric):
     """Assess predicted file against reference file for a given metric."""
-    with open(pred_file, "r", encoding="utf-8") as pred, open(
-            ref_file, "r", encoding="utf-8"
-    ) as ref:
         # TODO: any post-processing required?
         pred_lines = [line.strip() for line in pred.readlines()]
         ref_lines = [line.strip() for line in ref.readlines()]
@@ -78,6 +68,7 @@ def compute_score(pred_file, ref_file, metric):
     score = metric(ref_lines, pred_lines)
     return score
 # load up the results file
 CSV_RESULTS_FILE = os.path.join(SUBMISSION_NAME, "results.csv")
@@ -103,18 +94,21 @@ table.index = table.index + 1
 st.markdown("# XTREME-S: Evaluating Cross-lingual Speech Representations")
 st.markdown(
-    """
-    This is the leaderboard for the XTREME-S benchmark.
-    Submitted systems are ranked by the **average score**, which is a weighted average of
-    the mandatory test sets:
     $$
     \begin{gathered}
     0.4 *\left(100-\frac{\text{Fleurs}+\text{MLS}+\text{VP}}{3}\right)_{(\mathrm{WER})}+ \\
     0.4 * \text{CoVoST}-2_{(\mathrm{BLEU})}+0.2 *\left(\frac{\text{F-LID}+\text{M-14}}{2}\right)_{(\mathrm{Acc})}
     \end{gathered}
     $$
-    The optional dataset of f-r5 does not contribute to the average score."""
 )
 # st.table(table)
 st.dataframe(table.style.format(subset=["average-score", *TEST_SETS, *OPTIONAL_TEST_SETS], formatter="{:.1f}"))
@@ -154,7 +148,7 @@ if submit_button:
     submission = uploaded_file.name.split(".zip")[0]
     with st.spinner(f"Uploading {submission}..."):
-        with zipfile.ZipFile(uploaded_file, 'r') as zip_ref:
             zip_ref.extractall(submission_repo.local_dir)
             submission_repo.push_to_hub()
@@ -179,12 +173,15 @@ if submit_button:
             score = compute_score(pred_file, ref_file, metric)
             results[test_set] = round(100 * score, 2)
         # TODO: assessment of 'optional' test sets
         # XTREME-S score is computed over the mandatory test sets only
-        average_score = 0.4 * (100 - (results["fleurs"] + results["mls"] + results["vp"]) / 3) + 0.4 * results[
-            "covost-2"] + 0.2 * (results["f-lid"] + results["m-14"]) / 2
         results["average-score"] = round(average_score, 2)
         all_results = all_results.append(results, ignore_index=True)
@@ -193,4 +190,4 @@ if submit_button:
         all_results.to_csv(CSV_RESULTS_FILE, index=False)
         commit_url = submission_repo.push_to_hub()
-    st.success('Please refresh this space (CTRL+R) to see your result')

 REFERENCE_NAME = "references"
 SUBMISSION_NAME = "submissions"
+REFERENCE_URL = os.path.join("https://huggingface.co/datasets/xtreme-s", REFERENCE_NAME)
+SUBMISSION_URL = os.path.join("https://huggingface.co/datasets/xtreme-s", SUBMISSION_NAME)
 # grab these repos using the token provided
 HF_TOKEN = os.environ.get("HF_TOKEN")
+reference_repo = Repository(local_dir="references", clone_from=REFERENCE_URL, use_auth_token=HF_TOKEN)
+submission_repo = Repository(local_dir="submissions", clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN)
 submission_repo.git_pull()
 all_submissions = [
 EXPECTED_TEST_FILES = [f + ".txt" for f in TEST_SETS]
 # define the optional test sets - ignore for now
+OPTIONAL_TEST_SETS = []  # ["f-r5"]
 OPTIONAL_TEST_FILES = [f + ".txt" for f in OPTIONAL_TEST_SETS]
 # load all metrics
 def compute_score(pred_file, ref_file, metric):
     """Assess predicted file against reference file for a given metric."""
+    with open(pred_file, "r", encoding="utf-8") as pred, open(ref_file, "r", encoding="utf-8") as ref:
         # TODO: any post-processing required?
         pred_lines = [line.strip() for line in pred.readlines()]
         ref_lines = [line.strip() for line in ref.readlines()]
     score = metric(ref_lines, pred_lines)
     return score
 # load up the results file
 CSV_RESULTS_FILE = os.path.join(SUBMISSION_NAME, "results.csv")
 st.markdown("# XTREME-S: Evaluating Cross-lingual Speech Representations")
 st.markdown(
+    "This is the leaderboard for the XTREME-S benchmark. Submitted systems are ranked by the **average score**, which"
+    " is a weighted average of the mandatory test sets:"
+)
+# hacky way of getting math-mode to render
+st.write(
+    r"""
     $$
     \begin{gathered}
     0.4 *\left(100-\frac{\text{Fleurs}+\text{MLS}+\text{VP}}{3}\right)_{(\mathrm{WER})}+ \\
     0.4 * \text{CoVoST}-2_{(\mathrm{BLEU})}+0.2 *\left(\frac{\text{F-LID}+\text{M-14}}{2}\right)_{(\mathrm{Acc})}
     \end{gathered}
     $$
+    """
 )
+st.markdown("The optional dataset of f-r5 does not contribute to the average score.")
 # st.table(table)
 st.dataframe(table.style.format(subset=["average-score", *TEST_SETS, *OPTIONAL_TEST_SETS], formatter="{:.1f}"))
     submission = uploaded_file.name.split(".zip")[0]
     with st.spinner(f"Uploading {submission}..."):
+        with zipfile.ZipFile(uploaded_file, "r") as zip_ref:
             zip_ref.extractall(submission_repo.local_dir)
             submission_repo.push_to_hub()
             score = compute_score(pred_file, ref_file, metric)
             results[test_set] = round(100 * score, 2)
         # TODO: assessment of 'optional' test sets
         # XTREME-S score is computed over the mandatory test sets only
+        average_score = (
+            0.4 * (100 - (results["fleurs"] + results["mls"] + results["vp"]) / 3)
+            + 0.4 * results["covost-2"]
+            + 0.2 * (results["f-lid"] + results["m-14"]) / 2
+        )
         results["average-score"] = round(average_score, 2)
         all_results = all_results.append(results, ignore_index=True)
         all_results.to_csv(CSV_RESULTS_FILE, index=False)
         commit_url = submission_repo.push_to_hub()
+    st.success("Please refresh this space (CTRL+R) to see your result")