Tristan Thrush commited on
Commit
5170076
1 Parent(s): 338a59f

added ability to not select a dataset

Browse files
Files changed (1) hide show
  1. app.py +56 -23
app.py CHANGED
@@ -40,6 +40,8 @@ def parse_metrics_rows(meta, only_verified=False):
40
  if not isinstance(result, dict) or "dataset" not in result or "metrics" not in result or "type" not in result["dataset"]:
41
  continue
42
  dataset = result["dataset"]["type"]
 
 
43
  row = {"dataset": dataset, "split": "-unspecified-", "config": "-unspecified-"}
44
  if "split" in result["dataset"]:
45
  row["split"] = result["dataset"]["split"]
@@ -147,7 +149,7 @@ task = st.sidebar.selectbox(
147
  if task != "-any-":
148
  dataframe = dataframe[dataframe.pipeline_tag == task]
149
 
150
- selectable_datasets = sorted(list(set(dataframe.dataset.tolist())), key=lambda name: name.lower())
151
  if "" in selectable_datasets:
152
  selectable_datasets.remove("")
153
 
@@ -172,30 +174,37 @@ dataframe = dataframe[dataframe.only_verified == only_verified_results]
172
 
173
  st.experimental_set_query_params(**{"dataset": [dataset]})
174
 
175
- dataset_df = dataframe[dataframe.dataset == dataset]
 
 
 
 
176
  dataset_df = dataset_df.dropna(axis="columns", how="all")
177
 
178
  if len(dataset_df) > 0:
179
 
180
  selectable_configs = list(set(dataset_df["config"]))
181
- config = st.sidebar.selectbox(
182
- "Config",
183
- selectable_configs,
184
- help="Filter the results on the current leaderboard by the dataset config. Self-reported results might not report the config, which is why \"-unspecified-\" is an option."
185
- )
186
- dataset_df = dataset_df[dataset_df.config == config]
187
 
188
- selectable_splits = list(set(dataset_df["split"]))
189
- split = st.sidebar.selectbox(
190
- "Split",
191
- selectable_splits,
192
- help="Filter the results on the current leaderboard by the dataset split. Self-reported results might not report the split, which is why \"-unspecified-\" is an option."
193
- )
194
- dataset_df = dataset_df[dataset_df.split == split]
195
-
196
- selectable_metrics = list(filter(lambda column: column not in ("model_id", "dataset", "split", "config", "pipeline_tag", "only_verified"), dataset_df.columns))
197
-
198
- dataset_df = dataset_df.filter(["model_id"] + selectable_metrics)
 
 
 
 
 
 
 
 
 
199
  dataset_df = dataset_df.dropna(thresh=2) # Want at least two non-na values (one for model_id and one for a metric).
200
 
201
  sorting_metric = st.sidebar.radio(
@@ -213,19 +222,38 @@ if len(dataset_df) > 0:
213
  )
214
 
215
  st.markdown(
216
- "Note: if you do not see your self-reported results here, ensure that your results are in the expected range for all metrics. E.g., accuracy is 0-1, not 0-100."
217
  )
218
 
219
- # Make the default metric appear right after model names
 
 
 
 
 
220
  cols = dataset_df.columns.tolist()
221
  cols.remove(sorting_metric)
222
- cols = cols[:1] + [sorting_metric] + cols[1:]
 
223
  dataset_df = dataset_df[cols]
224
 
225
  # Sort the leaderboard, giving the sorting metric highest priority and then ordering by other metrics in the case of equal values.
226
- dataset_df = dataset_df.sort_values(by=cols[1:], ascending=[metric in ascending_metrics for metric in cols[1:]])
227
  dataset_df = dataset_df.replace(np.nan, '-')
228
 
 
 
 
 
 
 
 
 
 
 
 
 
 
229
  # Make the leaderboard
230
  gb = GridOptionsBuilder.from_dataframe(dataset_df)
231
  gb.configure_default_column(sortable=False)
@@ -233,6 +261,11 @@ if len(dataset_df) > 0:
233
  "model_id",
234
  cellRenderer=JsCode('''function(params) {return '<a target="_blank" href="https://huggingface.co/'+params.value+'">'+params.value+'</a>'}'''),
235
  )
 
 
 
 
 
236
  for name in selectable_metrics:
237
  gb.configure_column(name, type=["numericColumn","numberColumnFilter","customNumericFormat"], precision=4, aggFunc='sum')
238
 
40
  if not isinstance(result, dict) or "dataset" not in result or "metrics" not in result or "type" not in result["dataset"]:
41
  continue
42
  dataset = result["dataset"]["type"]
43
+ if dataset == "":
44
+ continue
45
  row = {"dataset": dataset, "split": "-unspecified-", "config": "-unspecified-"}
46
  if "split" in result["dataset"]:
47
  row["split"] = result["dataset"]["split"]
149
  if task != "-any-":
150
  dataframe = dataframe[dataframe.pipeline_tag == task]
151
 
152
+ selectable_datasets = ["-any-"] + sorted(list(set(dataframe.dataset.tolist())), key=lambda name: name.lower())
153
  if "" in selectable_datasets:
154
  selectable_datasets.remove("")
155
 
174
 
175
  st.experimental_set_query_params(**{"dataset": [dataset]})
176
 
177
+ if dataset != "-any-":
178
+ dataset_df = dataframe[dataframe.dataset == dataset]
179
+ else:
180
+ dataset_df = dataframe
181
+
182
  dataset_df = dataset_df.dropna(axis="columns", how="all")
183
 
184
  if len(dataset_df) > 0:
185
 
186
  selectable_configs = list(set(dataset_df["config"]))
 
 
 
 
 
 
187
 
188
+ if dataset != "-any-":
189
+ config = st.sidebar.selectbox(
190
+ "Config",
191
+ selectable_configs,
192
+ help="Filter the results on the current leaderboard by the dataset config. Self-reported results might not report the config, which is why \"-unspecified-\" is an option."
193
+ )
194
+ dataset_df = dataset_df[dataset_df.config == config]
195
+
196
+ selectable_splits = list(set(dataset_df["split"]))
197
+ split = st.sidebar.selectbox(
198
+ "Split",
199
+ selectable_splits,
200
+ help="Filter the results on the current leaderboard by the dataset split. Self-reported results might not report the split, which is why \"-unspecified-\" is an option."
201
+ )
202
+ dataset_df = dataset_df[dataset_df.split == split]
203
+
204
+ not_selectable_metrics = ["model_id", "dataset", "split", "config", "pipeline_tag", "only_verified"]
205
+ selectable_metrics = list(filter(lambda column: column not in not_selectable_metrics, dataset_df.columns))
206
+
207
+ dataset_df = dataset_df.filter(["model_id"] + (["dataset"] if dataset == "-any-" else []) + selectable_metrics)
208
  dataset_df = dataset_df.dropna(thresh=2) # Want at least two non-na values (one for model_id and one for a metric).
209
 
210
  sorting_metric = st.sidebar.radio(
222
  )
223
 
224
  st.markdown(
225
+ "If you do not see your self-reported results here, ensure that your results are in the expected range for all metrics. E.g., accuracy is 0-1, not 0-100."
226
  )
227
 
228
+ if dataset == "-any-":
229
+ st.info(
230
+ "Note: you haven't chosen a dataset, so the leaderboard is showing the best scoring model for each dataset."
231
+ )
232
+
233
+ # Make the default metric appear right after model names and dataset names
234
  cols = dataset_df.columns.tolist()
235
  cols.remove(sorting_metric)
236
+ sorting_metric_index = 1 if dataset != "-any-" else 2
237
+ cols = cols[:sorting_metric_index] + [sorting_metric] + cols[sorting_metric_index:]
238
  dataset_df = dataset_df[cols]
239
 
240
  # Sort the leaderboard, giving the sorting metric highest priority and then ordering by other metrics in the case of equal values.
241
+ dataset_df = dataset_df.sort_values(by=cols[sorting_metric_index:], ascending=[metric in ascending_metrics for metric in cols[sorting_metric_index:]])
242
  dataset_df = dataset_df.replace(np.nan, '-')
243
 
244
+ # If dataset is "-any-", only show the best model for each dataset. Otherwise
245
+ # The leaderboard is way too long and doesn't give the users a feel for all of
246
+ # the datasets available for a task.
247
+ if dataset == "-any-":
248
+ filtered_dataset_df_dict = {column: [] for column in dataset_df.columns}
249
+ seen_datasets = set()
250
+ for _, row in dataset_df.iterrows():
251
+ if row["dataset"] not in seen_datasets:
252
+ for column in dataset_df.columns:
253
+ filtered_dataset_df_dict[column].append(row[column])
254
+ seen_datasets.add(row["dataset"])
255
+ dataset_df = pd.DataFrame(filtered_dataset_df_dict)
256
+
257
  # Make the leaderboard
258
  gb = GridOptionsBuilder.from_dataframe(dataset_df)
259
  gb.configure_default_column(sortable=False)
261
  "model_id",
262
  cellRenderer=JsCode('''function(params) {return '<a target="_blank" href="https://huggingface.co/'+params.value+'">'+params.value+'</a>'}'''),
263
  )
264
+ if dataset == "-any-":
265
+ gb.configure_column(
266
+ "dataset",
267
+ cellRenderer=JsCode('''function(params) {return '<a target="_blank" href="https://huggingface.co/spaces/autoevaluate/leaderboards?dataset='+params.value+'">'+params.value+'</a>'}'''),
268
+ )
269
  for name in selectable_metrics:
270
  gb.configure_column(name, type=["numericColumn","numberColumnFilter","customNumericFormat"], precision=4, aggFunc='sum')
271