esc-bencher commited on
Commit
d3cf2d5
1 Parent(s): 671c314

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +40 -38
app.py CHANGED
@@ -16,21 +16,22 @@ SUBMISSION_URL = os.path.join(
16
  )
17
 
18
  TEST_SETS = [
19
- "librispeech-clean",
20
- "librispeech-other",
21
- "common-voice-9",
22
- "vox-populi",
23
- "ted-lium",
24
- "giga-speech",
25
- "spgi-speech",
26
- "earnings-22",
27
  "ami",
28
  ]
29
  EXPECTED_TEST_FILES = [f + ".txt" for f in TEST_SETS]
30
- OPTIONAL_TEST_SETS = ["switch-board", "call-home", "chime-4"]
31
 
32
- CSV_RESULTS_FILE = os.path.join(SUBMISSION_NAME, "results.csv")
 
33
 
 
34
 
35
  HF_TOKEN = os.environ.get("HF_TOKEN")
36
 
@@ -60,22 +61,6 @@ all_submissions = [
60
  if os.path.isdir(os.path.join(SUBMISSION_NAME, folder)) and folder != ".git"
61
  ]
62
 
63
-
64
- COLUMN_NAMES = {
65
- "librispeech-clean": "ls-clean",
66
- "librispeech-other": "ls-other",
67
- "common-voice-9": "cv9",
68
- "vox-populi": "vox",
69
- "ted-lium": "ted",
70
- "giga-speech": "giga",
71
- "spgi-speech": "spgi",
72
- "earnings-22": "e22",
73
- "ami": "ami",
74
- "chime-4": "chime",
75
- "switch-board": "swbd",
76
- "call-home": "ch",
77
- }
78
-
79
  all_results = read_csv(CSV_RESULTS_FILE)
80
 
81
 
@@ -85,12 +70,10 @@ table = all_results.copy()
85
  esc_column = table.pop("esc-score")
86
  name_column = table.pop("name")
87
  table.insert(0, "esc-score", esc_column)
88
- # TODO: revert to scaling raw WER by 100 to retrieve % point values
89
- table = table.select_dtypes(exclude=['object', 'string']) # * 100
90
  table.insert(0, "name", name_column)
 
91
  table = table.round(2)
92
- table = table.rename(columns=COLUMN_NAMES)
93
- # start indexing from 1
94
  table.index = table.index + 1
95
 
96
  # Streamlit
@@ -100,11 +83,11 @@ st.markdown(
100
  f"""
101
  This is the leaderboard of the End-to end Speech Challenge (ESC).
102
  Submitted systems are ranked by the **ESC Score** which is the average of
103
- all non-optional datasets: {', '.join(list(COLUMN_NAMES.values())[:-3])}."""
104
  )
105
 
106
  # st.table(table)
107
- st.dataframe(table.style.format(subset=["esc-score", *[COLUMN_NAMES[k] for k in COLUMN_NAMES]], formatter="{:.1f}"))
108
 
109
  st.markdown(
110
  """
@@ -118,7 +101,14 @@ st.markdown(
118
  """
119
  )
120
 
121
- st.markdown("To submit to ESC, download the audio data for the nine mandatory ESC test sets from [esc-datasets](https://huggingface.co/datasets/esc-benchmark/esc-datasets). The test sets contain audio data only. Evaluate your system on the nine test sets by generating predictions for the unlabelled audio samples. For each test set, save the predictions in the order that the audio samples are provided in a .txt file, with one prediction per line. Name the .txt file according to the ESC test set names shown in the table (e.g. the predictions for LibriSpeech test-clean should be named ls-clean.txt). Once you have evaluated your system on all nine test sets, move the predictions into one folder and zip it. The name you assign to the folder will be the name that is shown on the table (e.g. whisper-aed.zip will be displayed as whisper-aed)."
 
 
 
 
 
 
 
122
  )
123
 
124
  # Using the "with" syntax
@@ -138,9 +128,10 @@ if submit_button:
138
 
139
  with st.spinner(f"Computing ESC Score for {submission}..."):
140
  results = {"name": submission}
141
- submitted_files = os.listdir(os.path.join(SUBMISSION_NAME, submission))
142
 
143
- submitted_files = [f for f in submitted_files if f in EXPECTED_TEST_FILES]
 
144
 
145
  if sorted(EXPECTED_TEST_FILES) != sorted(submitted_files):
146
  raise ValueError(
@@ -152,16 +143,27 @@ if submit_button:
152
  pred_file = os.path.join(SUBMISSION_NAME, submission, file)
153
 
154
  wer = compute_wer(pred_file, ref_file)
155
- results[file.split(".")[0]] = str(wer)
 
 
 
 
 
 
 
156
 
 
157
  wer_values = [float(results[t]) for t in TEST_SETS]
 
 
 
158
  all_wer = sum(wer_values) / len(wer_values)
159
 
160
- results["esc-score"] = all_wer
161
  all_results = all_results.append(results, ignore_index=True)
162
 
163
  # save and upload new evaluated results
164
- all_results.to_csv(CSV_RESULTS_FILE)
165
  commit_url = submission_repo.push_to_hub()
166
 
167
  st.success('Please refresh this space (CTRL+R) to see your result')
 
16
  )
17
 
18
  TEST_SETS = [
19
+ "ls-clean",
20
+ "ls-other",
21
+ "cv",
22
+ "vox",
23
+ "ted",
24
+ "giga",
25
+ "spgi",
26
+ "earnings",
27
  "ami",
28
  ]
29
  EXPECTED_TEST_FILES = [f + ".txt" for f in TEST_SETS]
 
30
 
31
+ OPTIONAL_TEST_SETS = ["swbd", "ch", "chime-4"]
32
+ OPTIONAL_TEST_FILES = [f + ".txt" for f in OPTIONAL_TEST_SETS]
33
 
34
+ CSV_RESULTS_FILE = os.path.join(SUBMISSION_NAME, "results.csv")
35
 
36
  HF_TOKEN = os.environ.get("HF_TOKEN")
37
 
 
61
  if os.path.isdir(os.path.join(SUBMISSION_NAME, folder)) and folder != ".git"
62
  ]
63
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  all_results = read_csv(CSV_RESULTS_FILE)
65
 
66
 
 
70
  esc_column = table.pop("esc-score")
71
  name_column = table.pop("name")
72
  table.insert(0, "esc-score", esc_column)
73
+ table = table.select_dtypes(exclude=['object', 'string'])
 
74
  table.insert(0, "name", name_column)
75
+ table = table.sort_values(by=['esc-score'], ascending=True, ignore_index=True)
76
  table = table.round(2)
 
 
77
  table.index = table.index + 1
78
 
79
  # Streamlit
 
83
  f"""
84
  This is the leaderboard of the End-to end Speech Challenge (ESC).
85
  Submitted systems are ranked by the **ESC Score** which is the average of
86
+ all non-optional datasets: {", ".join(TEST_SETS)}. The optional datasets of swbd, ch and chime-4 do not contribute to the ESC score."""
87
  )
88
 
89
  # st.table(table)
90
+ st.dataframe(table.style.format(subset=["esc-score", *TEST_SETS, *OPTIONAL_TEST_SETS], formatter="{:.1f}"))
91
 
92
  st.markdown(
93
  """
 
101
  """
102
  )
103
 
104
+ st.markdown(
105
+ """
106
+ ## Submitting to ESC
107
+ \n
108
+ To submit to ESC, download the audio data for the nine mandatory ESC test sets from [esc-datasets](https://huggingface.co/datasets/esc-benchmark/esc-datasets). The test sets contain audio data only. Evaluate your system on the nine test sets by generating predictions for the unlabelled audio samples. For each test set, save the predictions to a .txt file in the order that the audio samples are provided, with one prediction per line. Name the .txt file according to the ESC test set names shown in the table (e.g. the predictions for LibriSpeech test-clean should be named ls-clean.txt).
109
+ \n
110
+ Once you have evaluated your system on all nine test sets, move the predictions into one folder and zip it. The name you assign to the zipped folder will be the name that is shown on the table (e.g. whisper-aed.zip will be displayed as whisper-aed). Upload your zipped submissions for scoring and placement on the leaderboard.
111
+ """
112
  )
113
 
114
  # Using the "with" syntax
 
128
 
129
  with st.spinner(f"Computing ESC Score for {submission}..."):
130
  results = {"name": submission}
131
+ all_submitted_files = os.listdir(os.path.join(SUBMISSION_NAME, submission))
132
 
133
+ submitted_files = [f for f in all_submitted_files if f in EXPECTED_TEST_FILES]
134
+ submitted_optional_files = [f for f in all_submitted_files if f in OPTIONAL_TEST_FILES]
135
 
136
  if sorted(EXPECTED_TEST_FILES) != sorted(submitted_files):
137
  raise ValueError(
 
143
  pred_file = os.path.join(SUBMISSION_NAME, submission, file)
144
 
145
  wer = compute_wer(pred_file, ref_file)
146
+ results[file.split(".")[0]] = round(100 * wer, 2)
147
+
148
+ for file in submitted_optional_files:
149
+ ref_file = os.path.join(REFERENCE_NAME, file)
150
+ pred_file = os.path.join(SUBMISSION_NAME, submission, file)
151
+
152
+ wer = compute_wer(pred_file, ref_file)
153
+ results[file.split(".")[0]] = round(100 * wer, 2)
154
 
155
+ # ESC score is computed over the mandatory test sets only
156
  wer_values = [float(results[t]) for t in TEST_SETS]
157
+ # first average over LS test sets
158
+ wer_values = [sum(wer_values[:2]) / 2, *wer_values[2:]]
159
+ # then macro-average over ESC test sets
160
  all_wer = sum(wer_values) / len(wer_values)
161
 
162
+ results["esc-score"] = round(all_wer, 2)
163
  all_results = all_results.append(results, ignore_index=True)
164
 
165
  # save and upload new evaluated results
166
+ all_results.to_csv(CSV_RESULTS_FILE, index=False)
167
  commit_url = submission_repo.push_to_hub()
168
 
169
  st.success('Please refresh this space (CTRL+R) to see your result')