interspeech2024_discrete_speech_asr

Running

simpleoier commited on Mar 17

Commit

83fabc4

•

1 Parent(s): e885ab4

add ranking

Files changed (2) hide show

src/display/about.py CHANGED Viewed

@@ -10,7 +10,7 @@ class Task:
 # Init: to update with your specific keys
 class Tasks(Enum):
-    # task_key in the json file, metric_key in the json file, name to display in the leaderboard
     task0 = Task("asr_eval1", "EN_LibriSpeech", "EN_LibriSpeech CER")
     task1 = Task("asr_eval2", "ML_SUPERB", "ML_SUPERB CER")
     task2 = Task("asr_eval3", "Bitrate", "Bitrate")
@@ -28,7 +28,7 @@ The leaderboard for discrete speech challenge (ASR Track) at Interspeech 2024. C
 LLM_BENCHMARKS_TEXT = f"""
 ## How it works
-The evaluation (static version) are conducted by the organizers only.
 We will accept submissions from the google form (see rules in the challenge website).

 # Init: to update with your specific keys
 class Tasks(Enum):
+    # task_key in the json file, metric_key in the json file, name to display in the leaderboard
     task0 = Task("asr_eval1", "EN_LibriSpeech", "EN_LibriSpeech CER")
     task1 = Task("asr_eval2", "ML_SUPERB", "ML_SUPERB CER")
     task2 = Task("asr_eval3", "Bitrate", "Bitrate")
 LLM_BENCHMARKS_TEXT = f"""
 ## How it works
+The evaluation (static version) are conducted by the organizers only.
 We will accept submissions from the google form (see rules in the challenge website).

src/populate.py CHANGED Viewed

@@ -13,8 +13,34 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
     all_data_json = [v.to_dict() for v in raw_data]
     df = pd.DataFrame.from_records(all_data_json)
-    # df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
-    df = df.sort_values(by=[AutoEvalColumn.task2.name], ascending=False)
     df = df[cols].round(decimals=2)
     # filter out if any of the benchmarks have not been produced

     all_data_json = [v.to_dict() for v in raw_data]
     df = pd.DataFrame.from_records(all_data_json)
+    # df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False))
+    # df = df.sort_values(by=[AutoEvalColumn.task3.name], ascending=True)
+    df[AutoEvalColumn.task0.name] = pd.Series(
+        np.stack(
+            np.array(df[AutoEvalColumn.task0.name].values)
+        ).squeeze()
+    )
+    df[AutoEvalColumn.task1.name] = pd.Series(
+        np.stack(
+            np.array(df[AutoEvalColumn.task1.name].values)
+        ).squeeze()
+    )
+    df[AutoEvalColumn.task2.name] = pd.Series(
+        np.stack(
+            np.array(df[AutoEvalColumn.task2.name].values)
+        ).squeeze()
+    )
+    en_cer_rank = df[AutoEvalColumn.task0.name].rank(method="min", numeric_only=True, ascending=True)
+    ml_cer_rank = df[AutoEvalColumn.task1.name].rank(method="min", numeric_only=True, ascending=True)
+    bitrate_rank = df[AutoEvalColumn.task2.name].rank(method="min", numeric_only=True, ascending=True)
+    df["Ranking"] = pd.Series((en_cer_rank + ml_cer_rank + bitrate_rank)/3)
+    df = df.sort_values(by=["Ranking", AutoEvalColumn.task1.name], ascending=True)
+    df["Rank"] = df.groupby("Precision").cumcount() + 1
+    df.pop("Ranking")
     df = df[cols].round(decimals=2)
     # filter out if any of the benchmarks have not been produced