Spaces:
Runtime error
Runtime error
tidy up
Browse files
app.py
CHANGED
@@ -12,11 +12,11 @@ column_names = {
|
|
12 |
"MODEL": "Model",
|
13 |
"Avg. WER": "Average WER ⬇️",
|
14 |
"RTF": "RTF (1e-3) ⬇️",
|
15 |
-
"Common Voice WER": "Common Voice",
|
16 |
"D_AVG_CV_WER": "Delta AVG-CV WER",
|
17 |
}
|
18 |
|
19 |
-
# Skipping testings just uing the numbers computed in the original space
|
20 |
# eval_queue_repo, requested_models, csv_results = load_all_info_from_dataset_hub()
|
21 |
|
22 |
# if not csv_results.exists():
|
@@ -111,12 +111,10 @@ data = [
|
|
111 |
1.2, 26.41, 41.75]
|
112 |
]
|
113 |
|
114 |
-
# Noms de colonnes mis à jour
|
115 |
columns = [
|
116 |
-
"
|
117 |
]
|
118 |
|
119 |
-
# Création du DataFrame avec les noms de colonnes mis à jour
|
120 |
original_df = pd.DataFrame(data, columns=columns)
|
121 |
|
122 |
# Formats the columns
|
@@ -134,7 +132,8 @@ original_df.rename(columns=column_names, inplace=True)
|
|
134 |
original_df.sort_values(by='Common Voice', inplace=True)
|
135 |
|
136 |
# Compute delta between average WER and CV WER
|
137 |
-
original_df['Detla'] = original_df['
|
|
|
138 |
|
139 |
COLS = [c.name for c in fields(AutoEvalColumn)]
|
140 |
TYPES = [c.type for c in fields(AutoEvalColumn)]
|
@@ -194,6 +193,21 @@ def request_model(model_text, chbcoco2017):
|
|
194 |
with gr.Blocks() as demo:
|
195 |
gr.HTML(BANNER, elem_id="banner")
|
196 |
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
197 |
|
198 |
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
199 |
with gr.TabItem("🏅 Leaderboard", elem_id="od-benchmark-tab-table", id=0):
|
|
|
12 |
"MODEL": "Model",
|
13 |
"Avg. WER": "Average WER ⬇️",
|
14 |
"RTF": "RTF (1e-3) ⬇️",
|
15 |
+
"Common Voice WER": "Common Voice WER ⬇️",
|
16 |
"D_AVG_CV_WER": "Delta AVG-CV WER",
|
17 |
}
|
18 |
|
19 |
+
# Skipping testings just uing the numbers computed in the original space for my sanity sake
|
20 |
# eval_queue_repo, requested_models, csv_results = load_all_info_from_dataset_hub()
|
21 |
|
22 |
# if not csv_results.exists():
|
|
|
111 |
1.2, 26.41, 41.75]
|
112 |
]
|
113 |
|
|
|
114 |
columns = [
|
115 |
+
"Model", "RTF (1e-3) ⬇️", "Average WER ⬇️", "Common Voice WER ⬇️"
|
116 |
]
|
117 |
|
|
|
118 |
original_df = pd.DataFrame(data, columns=columns)
|
119 |
|
120 |
# Formats the columns
|
|
|
132 |
original_df.sort_values(by='Common Voice', inplace=True)
|
133 |
|
134 |
# Compute delta between average WER and CV WER
|
135 |
+
original_df['Detla Avg. C.V. WER'] = original_df['Average WER ⬇️'] - original_df['Common Voice WER ⬇️']
|
136 |
+
original_df['Detla Avg. C.V. WER'] = original_df['Detla Avg. C.V. WER'].apply(formatter)
|
137 |
|
138 |
COLS = [c.name for c in fields(AutoEvalColumn)]
|
139 |
TYPES = [c.type for c in fields(AutoEvalColumn)]
|
|
|
193 |
with gr.Blocks() as demo:
|
194 |
gr.HTML(BANNER, elem_id="banner")
|
195 |
gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
|
196 |
+
CUSTOM_MESSAGE="""Legend:
|
197 |
+
This space is a fork of the original [hf-audio/open_asr_leaderboard](https://huggingface.co/spaces/hf-audio/open_asr_leaderboard).
|
198 |
+
It aims to show how CommonVoice Test Set is large enough for most languages to give a relativly good approximation of the average WER/CER but at a much lower computational cost.
|
199 |
+
#### Why is this useful?
|
200 |
+
Because, it gives us a standardized test set for most languages allowing us to programatically choose a relatively good model for any CV supported languages.
|
201 |
+
|
202 |
+
`Model`, `RTF (1e-3) ⬇️` and`Average WER ⬇️` were reported from [hf-audio/open_asr_leaderboard](https://huggingface.co/spaces/hf-audio/open_asr_leaderboard) the 9 using version from the 7 Sept. 2023.
|
203 |
+
|
204 |
+
### Results
|
205 |
+
CommonVoice Test test give a word error rate (WER) within less than 20 points of the average WER.
|
206 |
+
|
207 |
+
It's not good. Don't use only CommonVoice to choose the most adequate architecture.
|
208 |
+
But to quickly find a suitable ASR model for a large panel of lanugages in a programatic fashion, it's close enough."""
|
209 |
+
gr.Markdown(CUSTOM_MESSAGE, elem_classes="markdown-text")
|
210 |
+
|
211 |
|
212 |
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
213 |
with gr.TabItem("🏅 Leaderboard", elem_id="od-benchmark-tab-table", id=0):
|