wasertech commited on
Commit
60ea0fc
1 Parent(s): 1c0bd71
Files changed (1) hide show
  1. app.py +20 -6
app.py CHANGED
@@ -12,11 +12,11 @@ column_names = {
12
  "MODEL": "Model",
13
  "Avg. WER": "Average WER ⬇️",
14
  "RTF": "RTF (1e-3) ⬇️",
15
- "Common Voice WER": "Common Voice",
16
  "D_AVG_CV_WER": "Delta AVG-CV WER",
17
  }
18
 
19
- # Skipping testings just uing the numbers computed in the original space.
20
  # eval_queue_repo, requested_models, csv_results = load_all_info_from_dataset_hub()
21
 
22
  # if not csv_results.exists():
@@ -111,12 +111,10 @@ data = [
111
  1.2, 26.41, 41.75]
112
  ]
113
 
114
- # Noms de colonnes mis à jour
115
  columns = [
116
- "model", "RTF", "Avrg. WER", "Common Voice"
117
  ]
118
 
119
- # Création du DataFrame avec les noms de colonnes mis à jour
120
  original_df = pd.DataFrame(data, columns=columns)
121
 
122
  # Formats the columns
@@ -134,7 +132,8 @@ original_df.rename(columns=column_names, inplace=True)
134
  original_df.sort_values(by='Common Voice', inplace=True)
135
 
136
  # Compute delta between average WER and CV WER
137
- original_df['Detla'] = original_df['Avrg. WER'] - original_df['Common Voice']
 
138
 
139
  COLS = [c.name for c in fields(AutoEvalColumn)]
140
  TYPES = [c.type for c in fields(AutoEvalColumn)]
@@ -194,6 +193,21 @@ def request_model(model_text, chbcoco2017):
194
  with gr.Blocks() as demo:
195
  gr.HTML(BANNER, elem_id="banner")
196
  gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
197
 
198
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
199
  with gr.TabItem("🏅 Leaderboard", elem_id="od-benchmark-tab-table", id=0):
 
12
  "MODEL": "Model",
13
  "Avg. WER": "Average WER ⬇️",
14
  "RTF": "RTF (1e-3) ⬇️",
15
+ "Common Voice WER": "Common Voice WER ⬇️",
16
  "D_AVG_CV_WER": "Delta AVG-CV WER",
17
  }
18
 
19
+ # Skipping testings just uing the numbers computed in the original space for my sanity sake
20
  # eval_queue_repo, requested_models, csv_results = load_all_info_from_dataset_hub()
21
 
22
  # if not csv_results.exists():
 
111
  1.2, 26.41, 41.75]
112
  ]
113
 
 
114
  columns = [
115
+ "Model", "RTF (1e-3) ⬇️", "Average WER ⬇️", "Common Voice WER ⬇️"
116
  ]
117
 
 
118
  original_df = pd.DataFrame(data, columns=columns)
119
 
120
  # Formats the columns
 
132
  original_df.sort_values(by='Common Voice', inplace=True)
133
 
134
  # Compute delta between average WER and CV WER
135
+ original_df['Detla Avg. C.V. WER'] = original_df['Average WER ⬇️'] - original_df['Common Voice WER ⬇️']
136
+ original_df['Detla Avg. C.V. WER'] = original_df['Detla Avg. C.V. WER'].apply(formatter)
137
 
138
  COLS = [c.name for c in fields(AutoEvalColumn)]
139
  TYPES = [c.type for c in fields(AutoEvalColumn)]
 
193
  with gr.Blocks() as demo:
194
  gr.HTML(BANNER, elem_id="banner")
195
  gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
196
+ CUSTOM_MESSAGE="""Legend:
197
+ This space is a fork of the original [hf-audio/open_asr_leaderboard](https://huggingface.co/spaces/hf-audio/open_asr_leaderboard).
198
+ It aims to show how CommonVoice Test Set is large enough for most languages to give a relativly good approximation of the average WER/CER but at a much lower computational cost.
199
+ #### Why is this useful?
200
+ Because, it gives us a standardized test set for most languages allowing us to programatically choose a relatively good model for any CV supported languages.
201
+
202
+ `Model`, `RTF (1e-3) ⬇️` and`Average WER ⬇️` were reported from [hf-audio/open_asr_leaderboard](https://huggingface.co/spaces/hf-audio/open_asr_leaderboard) the 9 using version from the 7 Sept. 2023.
203
+
204
+ ### Results
205
+ CommonVoice Test test give a word error rate (WER) within less than 20 points of the average WER.
206
+
207
+ It's not good. Don't use only CommonVoice to choose the most adequate architecture.
208
+ But to quickly find a suitable ASR model for a large panel of lanugages in a programatic fashion, it's close enough."""
209
+ gr.Markdown(CUSTOM_MESSAGE, elem_classes="markdown-text")
210
+
211
 
212
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
213
  with gr.TabItem("🏅 Leaderboard", elem_id="od-benchmark-tab-table", id=0):