sanchit-gandhi HF staff commited on
Commit
cd05b03
1 Parent(s): dbf0e3c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -78
app.py CHANGED
@@ -1,7 +1,6 @@
1
  import pandas as pd
2
  import streamlit as st
3
- from huggingface_hub import HfApi, hf_hub_download
4
- from huggingface_hub.repocard import metadata_load
5
  from utils import ascending_metrics, metric_ranges
6
  import numpy as np
7
  from st_aggrid import AgGrid, GridOptionsBuilder, JsCode
@@ -80,9 +79,9 @@ def parse_metrics_rows(meta, only_verified=False):
80
  continue
81
  yield row
82
 
 
83
  @st.cache(ttl=0)
84
  def get_data_wrapper():
85
-
86
  def get_data(dataframe=None, verified_dataframe=None):
87
  data = []
88
  verified_data = []
@@ -126,107 +125,69 @@ def get_data_wrapper():
126
 
127
  return dataframe
128
 
 
129
  dataframe = get_data_wrapper()
130
 
131
- st.markdown("# 🤗 Leaderboards")
132
 
133
  query_params = st.experimental_get_query_params()
134
  if "first_query_params" not in st.session_state:
135
  st.session_state.first_query_params = query_params
136
  first_query_params = st.session_state.first_query_params
137
 
138
- default_task = first_query_params.get("task", [None])[0]
139
- default_only_verified = bool(int(first_query_params.get("only_verified", [0])[0]))
140
- print(default_only_verified)
141
- default_dataset = first_query_params.get("dataset", [None])[0]
142
- default_split = first_query_params.get("split", [None])[0]
143
  default_config = first_query_params.get("config", [None])[0]
144
  default_metric = first_query_params.get("metric", [None])[0]
145
 
146
- only_verified_results = st.sidebar.checkbox(
147
- "Filter for Verified Results",
148
- value=default_only_verified,
149
- help="Select this checkbox if you want to see only results produced by the Hugging Face model evaluator, and no self-reported results."
150
- )
151
-
152
- selectable_tasks = list(set(dataframe.pipeline_tag))
153
- if None in selectable_tasks:
154
- selectable_tasks.remove(None)
155
- selectable_tasks.sort(key=lambda name: name.lower())
156
- selectable_tasks = ["-any-"] + selectable_tasks
157
-
158
- task = st.sidebar.selectbox(
159
- "Task",
160
- selectable_tasks,
161
- index=(selectable_tasks).index(default_task) if default_task in selectable_tasks else 0,
162
- help="Filter the selectable datasets by task. Leave as \"-any-\" to see all selectable datasets."
163
- )
164
-
165
- if task != "-any-":
166
- dataframe = dataframe[dataframe.pipeline_tag == task]
167
-
168
- selectable_datasets = ["-any-"] + sorted(list(set(dataframe.dataset.tolist())), key=lambda name: name.lower())
169
- if "" in selectable_datasets:
170
- selectable_datasets.remove("")
171
-
172
- dataset = st.sidebar.selectbox(
173
- "Dataset",
174
- selectable_datasets,
175
- index=selectable_datasets.index(default_dataset) if default_dataset in selectable_datasets else 0,
176
- help="Select a dataset to see the leaderboard!"
177
- )
178
 
179
  dataframe = dataframe[dataframe.only_verified == only_verified_results]
180
 
181
- current_query_params = {"dataset": [dataset], "only_verified": [int(only_verified_results)], "task": [task]}
 
182
 
183
  st.experimental_set_query_params(**current_query_params)
184
 
185
- if dataset != "-any-":
186
- dataset_df = dataframe[dataframe.dataset == dataset]
187
- else:
188
- dataset_df = dataframe
189
 
190
  dataset_df = dataset_df.dropna(axis="columns", how="all")
191
 
 
 
 
 
 
 
 
192
  if len(dataset_df) > 0:
193
  selectable_configs = list(set(dataset_df["config"]))
194
  selectable_configs.sort(key=lambda name: name.lower())
195
 
196
- if "-unspecified-" in selectable_configs:
197
- selectable_configs.remove("-unspecified-")
198
- selectable_configs = ["-unspecified-"] + selectable_configs
199
 
200
  if dataset != "-any-":
201
  config = st.sidebar.selectbox(
202
- "Config",
203
  selectable_configs,
204
- index=selectable_configs.index(default_config) if default_config in selectable_configs else 0,
205
- help="Filter the results on the current leaderboard by the dataset config. Self-reported results might not report the config, which is why \"-unspecified-\" is an option."
206
  )
207
  dataset_df = dataset_df[dataset_df.config == config]
208
 
209
- selectable_splits = list(set(dataset_df["split"]))
210
- selectable_splits.sort(key=lambda name: name.lower())
211
-
212
- if "-unspecified-" in selectable_splits:
213
- selectable_splits.remove("-unspecified-")
214
- selectable_splits = ["-unspecified-"] + selectable_splits
215
-
216
  split = st.sidebar.selectbox(
217
  "Split",
218
  selectable_splits,
219
- index=selectable_splits.index(default_split) if default_split in selectable_splits else 0,
220
- help="Filter the results on the current leaderboard by the dataset split. Self-reported results might not report the split, which is why \"-unspecified-\" is an option."
221
  )
222
 
223
- current_query_params.update({"config": [config], "split": [split]})
224
-
225
- st.experimental_set_query_params(**current_query_params)
226
-
227
- dataset_df = dataset_df[dataset_df.split == split]
228
-
229
  not_selectable_metrics = ["model_id", "dataset", "split", "config", "pipeline_tag", "only_verified"]
 
 
 
230
  selectable_metrics = list(filter(lambda column: column not in not_selectable_metrics, dataset_df.columns))
231
 
232
  dataset_df = dataset_df.filter(["model_id"] + (["dataset"] if dataset == "-any-" else []) + selectable_metrics)
@@ -248,18 +209,9 @@ if len(dataset_df) > 0:
248
  )
249
 
250
  st.markdown(
251
- "Want to beat the leaderboard? Don't see your model here? Simply request an automatic evaluation [here](https://huggingface.co/spaces/autoevaluate/model-evaluator)."
252
- )
253
-
254
- st.markdown(
255
- "If you do not see your self-reported results here, ensure that your results are in the expected range for all metrics. E.g., accuracy is 0-1, not 0-100."
256
  )
257
 
258
- if dataset == "-any-":
259
- st.info(
260
- "Note: you haven't chosen a dataset, so the leaderboard is showing the best scoring model for a random sample of the datasets available."
261
- )
262
-
263
  # Make the default metric appear right after model names and dataset names
264
  cols = dataset_df.columns.tolist()
265
  cols.remove(sorting_metric)
@@ -313,4 +265,4 @@ if len(dataset_df) > 0:
313
  else:
314
  st.markdown(
315
  "No " + ("verified" if only_verified_results else "unverified") + " results to display. Try toggling the verified results filter."
316
- )
 
1
  import pandas as pd
2
  import streamlit as st
3
+ from huggingface_hub import HfApi
 
4
  from utils import ascending_metrics, metric_ranges
5
  import numpy as np
6
  from st_aggrid import AgGrid, GridOptionsBuilder, JsCode
 
79
  continue
80
  yield row
81
 
82
+
83
  @st.cache(ttl=0)
84
  def get_data_wrapper():
 
85
  def get_data(dataframe=None, verified_dataframe=None):
86
  data = []
87
  verified_data = []
 
125
 
126
  return dataframe
127
 
128
+
129
  dataframe = get_data_wrapper()
130
 
131
+ st.markdown("# 🤗 Whisper Event: Final Leaderboard")
132
 
133
  query_params = st.experimental_get_query_params()
134
  if "first_query_params" not in st.session_state:
135
  st.session_state.first_query_params = query_params
136
  first_query_params = st.session_state.first_query_params
137
 
 
 
 
 
 
138
  default_config = first_query_params.get("config", [None])[0]
139
  default_metric = first_query_params.get("metric", [None])[0]
140
 
141
+ only_verified_results = False
142
+ task = "automatic-speech-recognition"
143
+ dataset = "mozilla-foundation/common_voice_11_0"
144
+ split = "test"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
 
146
  dataframe = dataframe[dataframe.only_verified == only_verified_results]
147
 
148
+ current_query_params = {"dataset": [dataset], "only_verified": [int(only_verified_results)], "task": [task],
149
+ "split": [split]}
150
 
151
  st.experimental_set_query_params(**current_query_params)
152
 
153
+ dataset_df = dataframe[dataframe.dataset == dataset]
154
+ dataset_df = dataset_df[dataset_df.split == split]
 
 
155
 
156
  dataset_df = dataset_df.dropna(axis="columns", how="all")
157
 
158
+ selectable_datasets = [dataset]
159
+ dataset = st.sidebar.selectbox(
160
+ "Dataset",
161
+ selectable_datasets,
162
+ index=0,
163
+ )
164
+
165
  if len(dataset_df) > 0:
166
  selectable_configs = list(set(dataset_df["config"]))
167
  selectable_configs.sort(key=lambda name: name.lower())
168
 
169
+ selectable_configs.remove("-unspecified-")
 
 
170
 
171
  if dataset != "-any-":
172
  config = st.sidebar.selectbox(
173
+ "Language",
174
  selectable_configs,
175
+ index=0,
176
+ help="Filter the results on the current leaderboard by language."
177
  )
178
  dataset_df = dataset_df[dataset_df.config == config]
179
 
180
+ selectable_splits = [split]
 
 
 
 
 
 
181
  split = st.sidebar.selectbox(
182
  "Split",
183
  selectable_splits,
184
+ index=0,
 
185
  )
186
 
 
 
 
 
 
 
187
  not_selectable_metrics = ["model_id", "dataset", "split", "config", "pipeline_tag", "only_verified"]
188
+ # also ignore irrelevant ASR metrics
189
+ not_selectable_metrics.extend(["wer_without_norm", "mer"])
190
+
191
  selectable_metrics = list(filter(lambda column: column not in not_selectable_metrics, dataset_df.columns))
192
 
193
  dataset_df = dataset_df.filter(["model_id"] + (["dataset"] if dataset == "-any-" else []) + selectable_metrics)
 
209
  )
210
 
211
  st.markdown(
212
+ "Want to beat the leaderboard? Don't see your model here? Simply ..."
 
 
 
 
213
  )
214
 
 
 
 
 
 
215
  # Make the default metric appear right after model names and dataset names
216
  cols = dataset_df.columns.tolist()
217
  cols.remove(sorting_metric)
 
265
  else:
266
  st.markdown(
267
  "No " + ("verified" if only_verified_results else "unverified") + " results to display. Try toggling the verified results filter."
268
+ )