Spaces:

xtreme-s
/

leaderboard

Runtime error

App Files Files Community

sanchit-gandhi HF staff commited on Nov 21, 2022

Commit

c5fe7df

•

1 Parent(s): a804208

Create app.py

Browse files

Files changed (1) hide show

app.py +186 -0

app.py ADDED Viewed

	@@ -0,0 +1,186 @@

+import streamlit as st
+from pandas import read_csv
+import os
+from evaluate import load
+from huggingface_hub import Repository
+import zipfile
+# first define URLs for the reference and submission datasets on the Hub
+REFERENCE_NAME = "references"
+SUBMISSION_NAME = "submissions"
+REFERENCE_URL = os.path.join(
+    "https://huggingface.co/datasets/xtreme-s", REFERENCE_NAME
+)
+SUBMISSION_URL = os.path.join(
+    "https://huggingface.co/datasets/xtreme-s", SUBMISSION_NAME
+)
+# grab these repos using the token provided
+HF_TOKEN = os.environ.get("HF_TOKEN")
+reference_repo = Repository(
+    local_dir="references", clone_from=REFERENCE_URL, use_auth_token=HF_TOKEN
+)
+submission_repo = Repository(
+    local_dir="submissions", clone_from=SUBMISSION_URL, use_auth_token=HF_TOKEN
+)
+submission_repo.git_pull()
+all_submissions = [
+    folder
+    for folder in os.listdir(SUBMISSION_NAME)
+    if os.path.isdir(os.path.join(SUBMISSION_NAME, folder)) and folder != ".git"
+]
+# define the XTREME-S test sets
+TEST_SETS = [
+    "fleurs",
+    "mls",
+    "vp",
+    "covost-2",
+    "f-lid",
+    "m-14",
+]
+EXPECTED_TEST_FILES = [f + ".txt" for f in TEST_SETS]
+# define the optional test sets
+OPTIONAL_TEST_SETS = ["f-r5"]
+OPTIONAL_TEST_FILES = [f + ".txt" for f in OPTIONAL_TEST_SETS]
+# load all metrics
+wer_metric = load("wer")
+bleu_metric = load("bleu")
+acc_metric = load("accuracy")
+f1_metric = load("f1")
+# map test set to metric
+METRIC_MAP = {
+    "fleurs": wer_metric,
+    "mls": wer_metric,
+    "vp": wer_metric,
+    "covost-2": bleu_metric,
+    "f-lid": acc_metric,
+    "m-14": f1_metric,
+}
+def compute_score(pred_file, ref_file, metric):
+    """Assess predicted file against reference file for a given metric."""
+    with open(pred_file, "r", encoding="utf-8") as pred, open(
+            ref_file, "r", encoding="utf-8"
+    ) as ref:
+        # TODO: any post-processing required?
+        pred_lines = [line.strip() for line in pred.readlines()]
+        ref_lines = [line.strip() for line in ref.readlines()]
+    score = metric(ref_lines, pred_lines)
+    return score
+# load up the results file
+CSV_RESULTS_FILE = os.path.join(SUBMISSION_NAME, "results.csv")
+all_results = read_csv(CSV_RESULTS_FILE)
+# Write table form CSV
+table = all_results.copy()
+# make sure the column ordering is correct (name, average-score, fleurs, mls, ...)
+average_column = table.pop("average-score")
+name_column = table.pop("name")
+table.insert(0, "average-score", average_column)
+table = table.select_dtypes(exclude=["object", "string"])
+table.insert(0, "name", name_column)
+table = table.sort_values(by=["average-score"], ascending=False, ignore_index=True)
+table = table.round(2)
+table.index = table.index + 1
+# Streamlit
+st.markdown("# XTREME-S: Evaluating Cross-lingual Speech Representations")
+st.markdown(
+    f"""
+    This is the leaderboard of the XTREME-S benchmark.
+    Submitted systems are ranked by the **ESB Score** which is the average of
+    all non-optional datasets: {", ".join(TEST_SETS)}. The optional dataset of f-r5 does not contribute to the average score."""
+)
+# st.table(table)
+st.dataframe(table.style.format(subset=["esb-score", *TEST_SETS, *OPTIONAL_TEST_SETS], formatter="{:.1f}"))
+st.markdown(
+    """
+    XTREME-S was proposed in *XTREME-S: Evaluating Cross-lingual Speech Representations*, by Conneau et. al.
+    \n
+    The abstract of the paper is as follows:
+    \n
+    *We introduce XTREME-S, a new benchmark to evaluate universal cross-lingual speech representations in many languages. XTREME-S covers four task families: speech recognition, classification, speech-to-text translation and retrieval. Covering 102 languages from 10+ language families, 3 different domains and 4 task families, XTREME-S aims to simplify multilingual speech representation evaluation, as well as catalyze research in "universal" speech representation learning. This paper describes the new benchmark and establishes the first speech-only and speech-text baselines using XLS-R and mSLAM on all downstream tasks. We motivate the design choices and detail how to use the benchmark.*
+    \n
+    For more information, refer to the paper submission on [Arxiv](https://arxiv.org/abs/2203.10752).
+    """
+)
+st.markdown(
+    """
+    ## Submitting to XTREME-S
+    \n
+    To submit to XTREME-S, download the audio data for the mandatory XTREME-S test sets from [xtreme-s/datasets](https://huggingface.co/datasets/xtreme-s/datasets). The test sets contain audio data only. Evaluate your system on the nine test sets by generating predictions for the unlabelled audio samples. For each test set, save the predictions to a .txt file in the order that the audio samples are provided, with one prediction per line. Name the .txt file according to the XTREME-S test set names shown in the table (e.g. the predictions for Fleurs should be named fleurs.txt).
+    \n
+    Once you have evaluated your system on all of the six mandatory test sets, move the predictions into one folder and zip it. The name you assign to the zipped folder will be the name that is shown on the table (e.g. mSLAM.zip will be displayed as mSLAM). Upload your zipped submissions for scoring and placement on the leaderboard.
+    \n
+    Should you experience any issues, open an issue using the link [new discussion](https://huggingface.co/spaces/xtreme-s/leaderboard/discussions/new) and tag `@sanchit-gandhi`.
+ """
+)
+# Using the "with" syntax
+with st.form(key="my_form"):
+    uploaded_file = st.file_uploader("Choose a zip file")
+    submit_button = st.form_submit_button(label="Submit")
+if submit_button:
+    if uploaded_file is None:
+        raise ValueError("Please make sure to have uploaded a zip file.")
+    submission = uploaded_file.name.split(".zip")[0]
+    with st.spinner(f"Uploading {submission}..."):
+        with zipfile.ZipFile(uploaded_file, 'r') as zip_ref:
+            zip_ref.extractall(submission_repo.local_dir)
+            submission_repo.push_to_hub()
+    with st.spinner(f"Computing XTREME-S Score for {submission}..."):
+        results = {"name": submission}
+        all_submitted_files = os.listdir(os.path.join(SUBMISSION_NAME, submission))
+        submitted_files = [f for f in all_submitted_files if f in EXPECTED_TEST_FILES]
+        submitted_optional_files = [f for f in all_submitted_files if f in OPTIONAL_TEST_FILES]
+        if sorted(EXPECTED_TEST_FILES) != sorted(submitted_files):
+            raise ValueError(
+                f"{', '.join(submitted_files)} were submitted, but expected {', '.join(EXPECTED_TEST_FILES)}"
+            )
+        for file in submitted_files:
+            ref_file = os.path.join(REFERENCE_NAME, file)
+            pred_file = os.path.join(SUBMISSION_NAME, submission, file)
+            test_set = file.split(".")[0]
+            metric = METRIC_MAP[test_set]
+            score = compute_score(pred_file, ref_file, metric)
+            results[test_set] = round(100 * score, 2)
+        # TODO: assessment of 'optional' test sets
+        # XTREME-S score is computed over the mandatory test sets only
+        average_score = 0.4 * (100 - (results["fleurs"] + results["mls"] + results["vp"]) / 3) + 0.4 * results[
+            "covost-2"] + 0.2 * (results["f-lid"] + results["m-14"]) / 2
+        results["average-score"] = round(average_score, 2)
+        all_results = all_results.append(results, ignore_index=True)
+        # save and upload new evaluated results
+        all_results.to_csv(CSV_RESULTS_FILE, index=False)
+        commit_url = submission_repo.push_to_hub()
+    st.success('Please refresh this space (CTRL+R) to see your result')