Spaces:

Steveeeeeeen
/

Open_ASR_Leaderboard

Running

App Files Files Community

Steveeeeeeen HF staff commited on 12 days ago

Commit

3dc3962

•

1 Parent(s): 6787ab4

whisper_leaderboard (#1)

Browse files

- added whipser leaderboard (4a6c7b9fe92fc5f986353fdeaebb9abb715f97fd)

Files changed (3) hide show

app.py +77 -6
constants.py +5 -4
init.py +16 -3

app.py CHANGED Viewed

@@ -22,14 +22,32 @@ column_names = {
     "Voxpopuli WER": "Voxpopuli",
 }
-eval_queue_repo, requested_models, csv_results = load_all_info_from_dataset_hub()
 if not csv_results.exists():
     raise Exception(f"CSV file {csv_results} does not exist locally")
 # Get csv with data and parse columns
 original_df = pd.read_csv(csv_results)
 # Formats the columns
 def formatter(x):
     if type(x) is str:
@@ -43,9 +61,11 @@ for col in original_df.columns:
         original_df[col] = original_df[col].apply(lambda x: x.replace(x, make_clickable_model(x)))
     else:
         original_df[col] = original_df[col].apply(formatter) # For numerical values
 original_df.rename(columns=column_names, inplace=True)
 original_df.sort_values(by='Average WER ⬇️', inplace=True)
 COLS = [c.name for c in fields(AutoEvalColumn)]
 TYPES = [c.type for c in fields(AutoEvalColumn)]
@@ -115,11 +135,62 @@ with gr.Blocks(css=LEADERBOARD_CSS) as demo:
                 interactive=False,
                 visible=True,
                 )
-        with gr.TabItem("📈 Metrics", elem_id="od-benchmark-tab-table", id=1):
             gr.Markdown(METRICS_TAB_TEXT, elem_classes="markdown-text")
-        with gr.TabItem("✉️✨ Request a model here!", elem_id="od-benchmark-tab-table", id=2):
             with gr.Column():
                 gr.Markdown("# ✉️✨ Request results for a new model here!", elem_classes="markdown-text")
             with gr.Column():

     "Voxpopuli WER": "Voxpopuli",
 }
+whisper_column_names = {
+    "MODEL": "Model",
+    "Avg. WER": "Average WER ⬇️",
+    "RTFx": "RTFx ⬆️️",
+    "Backend": "Backend",
+    "Hardware": "Device",
+    "AMI WER": "AMI",
+    "Earnings22 WER": "Earnings22",
+    "Gigaspeech WER": "Gigaspeech",
+    "LS Clean WER": "LS Clean",
+    "LS Other WER": "LS Other",
+    "SPGISpeech WER": "SPGISpeech",
+    "Tedlium WER": "Tedlium",
+    "Voxpopuli WER": "Voxpopuli",
+}
+eval_queue_repo, requested_models, csv_results, whisper_eval_queue_repo, whisper_csv_results = load_all_info_from_dataset_hub()
 if not csv_results.exists():
     raise Exception(f"CSV file {csv_results} does not exist locally")
+if not whisper_csv_results.exists():
+    raise Exception(f"CSV file {whisper_csv_results} does not exist locally")
 # Get csv with data and parse columns
 original_df = pd.read_csv(csv_results)
+whisper_df = pd.read_csv(whisper_csv_results)
 # Formats the columns
 def formatter(x):
     if type(x) is str:
         original_df[col] = original_df[col].apply(lambda x: x.replace(x, make_clickable_model(x)))
     else:
         original_df[col] = original_df[col].apply(formatter) # For numerical values
+        whisper_df[col] = whisper_df[col].apply(formatter) # For numerical values
 original_df.rename(columns=column_names, inplace=True)
 original_df.sort_values(by='Average WER ⬇️', inplace=True)
+whisper_df.rename(columns=whisper_column_names, inplace=True)
+whisper_df.sort_values(by='Average WER ⬇️', inplace=True)
 COLS = [c.name for c in fields(AutoEvalColumn)]
 TYPES = [c.type for c in fields(AutoEvalColumn)]
                 interactive=False,
                 visible=True,
                 )
+        with gr.TabItem("🔄 Whisper Model Leaderboard", elem_id="whisper-backends-tab", id=1):
+            gr.Markdown("## Whisper Model Performance Across Different Backends", elem_classes="markdown-text")
+            gr.Markdown("This table shows how different Whisper model implementations compare in terms of performance and speed.", elem_classes="markdown-text")
+            with gr.Row():
+                backend_filter = gr.Dropdown(
+                    choices=["All"] + sorted(whisper_df["Backend"].unique().tolist()),
+                    value="All",
+                    label="Filter by Backend",
+                    elem_id="backend-filter",
+                    multiselect=True  # Enable multiple selection
+                )
+                device_choices = ["All"] + sorted(whisper_df["Device"].unique().tolist()) if "Device" in whisper_df.columns else ["All"]
+                device_filter = gr.Dropdown(
+                    choices=device_choices,
+                    value="All",
+                    label="Filter by Device",
+                    elem_id="device-filter",
+                    multiselect=True  # Enable multiple selection
+                )
+            whisper_table = gr.components.Dataframe(
+                value=whisper_df,
+                datatype=TYPES,
+                elem_id="whisper-table",
+                interactive=False,
+                visible=True,
+            )
+            def filter_whisper_table(backends, devices):
+                filtered_df = whisper_df.copy()
+                # Handle backend filtering
+                if backends and "All" not in backends:
+                    filtered_df = filtered_df[filtered_df["Backend"].isin(backends)]
+                # Handle device filtering
+                if devices and "All" not in devices and "Device" in filtered_df.columns:
+                    filtered_df = filtered_df[filtered_df["Device"].isin(devices)]
+                return filtered_df
+            backend_filter.change(
+                filter_whisper_table,
+                inputs=[backend_filter, device_filter],
+                outputs=whisper_table
+            )
+            device_filter.change(
+                filter_whisper_table,
+                inputs=[backend_filter, device_filter],
+                outputs=whisper_table
+            )
+        with gr.TabItem("📈 Metrics", elem_id="od-benchmark-tab-table", id=2):
             gr.Markdown(METRICS_TAB_TEXT, elem_classes="markdown-text")
+        with gr.TabItem("✉️✨ Request a model here!", elem_id="od-benchmark-tab-table", id=3):
             with gr.Column():
                 gr.Markdown("# ✉️✨ Request results for a new model here!", elem_classes="markdown-text")
             with gr.Column():

constants.py CHANGED Viewed

@@ -116,8 +116,9 @@ For more details on the individual datasets and how models are evaluated to give
 LEADERBOARD_CSS = """
 #leaderboard-table th .header-content {
-		white-space: nowrap;
-		overflow: hidden;
-		text-overflow: ellipsis;
-	}
 """

 LEADERBOARD_CSS = """
 #leaderboard-table th .header-content {
+    white-space: nowrap;
+}
+#whisper-backends-tab th .header-content {
+    white-space: nowrap;
+}
 """

init.py CHANGED Viewed

@@ -4,8 +4,10 @@ from pathlib import Path
 from huggingface_hub import HfApi, Repository
 TOKEN_HUB = os.environ.get("TOKEN_HUB", None)
-QUEUE_REPO = os.environ.get("QUEUE_REPO", None)
-QUEUE_PATH = os.environ.get("QUEUE_PATH", None)
 hf_api = HfApi(
     endpoint="https://huggingface.co",
@@ -29,6 +31,14 @@ def load_all_info_from_dataset_hub():
             repo_type="dataset",
         )
         eval_queue_repo.git_pull()
         # Local directory where dataset repo is cloned + folder with eval requests
         directory = QUEUE_PATH / EVAL_REQUESTS_PATH
@@ -38,10 +48,13 @@ def load_all_info_from_dataset_hub():
         csv_results = get_csv_with_results(QUEUE_PATH)
         if csv_results is None:
             passed = False
     if not passed:
         raise ValueError("No Hugging Face token provided. Skipping evaluation requests and results.")
-    return eval_queue_repo, requested_models, csv_results
 def upload_file(requested_model_name, path_or_fileobj):

 from huggingface_hub import HfApi, Repository
 TOKEN_HUB = os.environ.get("TOKEN_HUB", None)
+QUEUE_REPO = os.environ.get("QUEUE_REPO", "hf-audio/leaderboard-evals")
+QUEUE_REPO_WHISPER = os.environ.get("QUEUE_REPO_WHISPER", "Steveeeeeeen/whisper-leaderboard-evals")
+QUEUE_PATH = os.environ.get("QUEUE_PATH", "results")
+QUEUE_PATH_WHISPER = os.environ.get("QUEUE_PATH_WHISPER", "whisper-results")
 hf_api = HfApi(
     endpoint="https://huggingface.co",
             repo_type="dataset",
         )
         eval_queue_repo.git_pull()
+        whisper_eval_queue_repo = Repository(
+            local_dir=QUEUE_PATH_WHISPER,
+            clone_from=QUEUE_REPO_WHISPER,
+            use_auth_token=TOKEN_HUB,
+            repo_type="dataset",
+        )
+        whisper_eval_queue_repo.git_pull()
         # Local directory where dataset repo is cloned + folder with eval requests
         directory = QUEUE_PATH / EVAL_REQUESTS_PATH
         csv_results = get_csv_with_results(QUEUE_PATH)
         if csv_results is None:
             passed = False
+        whisper_csv_results = get_csv_with_results(QUEUE_PATH_WHISPER)
+        if whisper_csv_results is None:
+            passed = False
     if not passed:
         raise ValueError("No Hugging Face token provided. Skipping evaluation requests and results.")
+    return eval_queue_repo, requested_models, csv_results, whisper_eval_queue_repo, whisper_csv_results
 def upload_file(requested_model_name, path_or_fileobj):