simpleoier commited on
Commit
83fabc4
1 Parent(s): e885ab4

add ranking

Browse files
Files changed (2) hide show
  1. src/display/about.py +2 -2
  2. src/populate.py +28 -2
src/display/about.py CHANGED
@@ -10,7 +10,7 @@ class Task:
10
 
11
  # Init: to update with your specific keys
12
  class Tasks(Enum):
13
- # task_key in the json file, metric_key in the json file, name to display in the leaderboard
14
  task0 = Task("asr_eval1", "EN_LibriSpeech", "EN_LibriSpeech CER")
15
  task1 = Task("asr_eval2", "ML_SUPERB", "ML_SUPERB CER")
16
  task2 = Task("asr_eval3", "Bitrate", "Bitrate")
@@ -28,7 +28,7 @@ The leaderboard for discrete speech challenge (ASR Track) at Interspeech 2024. C
28
  LLM_BENCHMARKS_TEXT = f"""
29
  ## How it works
30
 
31
- The evaluation (static version) are conducted by the organizers only.
32
 
33
  We will accept submissions from the google form (see rules in the challenge website).
34
 
 
10
 
11
  # Init: to update with your specific keys
12
  class Tasks(Enum):
13
+ # task_key in the json file, metric_key in the json file, name to display in the leaderboard
14
  task0 = Task("asr_eval1", "EN_LibriSpeech", "EN_LibriSpeech CER")
15
  task1 = Task("asr_eval2", "ML_SUPERB", "ML_SUPERB CER")
16
  task2 = Task("asr_eval3", "Bitrate", "Bitrate")
 
28
  LLM_BENCHMARKS_TEXT = f"""
29
  ## How it works
30
 
31
+ The evaluation (static version) are conducted by the organizers only.
32
 
33
  We will accept submissions from the google form (see rules in the challenge website).
34
 
src/populate.py CHANGED
@@ -13,8 +13,34 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
13
  all_data_json = [v.to_dict() for v in raw_data]
14
 
15
  df = pd.DataFrame.from_records(all_data_json)
16
- # df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
17
- df = df.sort_values(by=[AutoEvalColumn.task2.name], ascending=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
  df = df[cols].round(decimals=2)
19
 
20
  # filter out if any of the benchmarks have not been produced
 
13
  all_data_json = [v.to_dict() for v in raw_data]
14
 
15
  df = pd.DataFrame.from_records(all_data_json)
16
+ # df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False))
17
+ # df = df.sort_values(by=[AutoEvalColumn.task3.name], ascending=True)
18
+
19
+
20
+ df[AutoEvalColumn.task0.name] = pd.Series(
21
+ np.stack(
22
+ np.array(df[AutoEvalColumn.task0.name].values)
23
+ ).squeeze()
24
+ )
25
+ df[AutoEvalColumn.task1.name] = pd.Series(
26
+ np.stack(
27
+ np.array(df[AutoEvalColumn.task1.name].values)
28
+ ).squeeze()
29
+ )
30
+ df[AutoEvalColumn.task2.name] = pd.Series(
31
+ np.stack(
32
+ np.array(df[AutoEvalColumn.task2.name].values)
33
+ ).squeeze()
34
+ )
35
+
36
+ en_cer_rank = df[AutoEvalColumn.task0.name].rank(method="min", numeric_only=True, ascending=True)
37
+ ml_cer_rank = df[AutoEvalColumn.task1.name].rank(method="min", numeric_only=True, ascending=True)
38
+ bitrate_rank = df[AutoEvalColumn.task2.name].rank(method="min", numeric_only=True, ascending=True)
39
+ df["Ranking"] = pd.Series((en_cer_rank + ml_cer_rank + bitrate_rank)/3)
40
+ df = df.sort_values(by=["Ranking", AutoEvalColumn.task1.name], ascending=True)
41
+ df["Rank"] = df.groupby("Precision").cumcount() + 1
42
+ df.pop("Ranking")
43
+
44
  df = df[cols].round(decimals=2)
45
 
46
  # filter out if any of the benchmarks have not been produced