Spaces:

autoevaluate
/

leaderboards

Running

App Files Files Community

Tristan Thrush commited on Jul 11, 2022

Commit

5170076

•

1 Parent(s): 338a59f

added ability to not select a dataset

Browse files

Files changed (1) hide show

app.py +56 -23

app.py CHANGED Viewed

@@ -40,6 +40,8 @@ def parse_metrics_rows(meta, only_verified=False):
         if not isinstance(result, dict) or "dataset" not in result or "metrics" not in result or "type" not in result["dataset"]:
             continue
         dataset = result["dataset"]["type"]
         row = {"dataset": dataset, "split": "-unspecified-", "config": "-unspecified-"}
         if "split" in result["dataset"]:
             row["split"] = result["dataset"]["split"]
@@ -147,7 +149,7 @@ task = st.sidebar.selectbox(
 if task != "-any-":
     dataframe = dataframe[dataframe.pipeline_tag == task]
-selectable_datasets = sorted(list(set(dataframe.dataset.tolist())), key=lambda name: name.lower())
 if "" in selectable_datasets:
     selectable_datasets.remove("")
@@ -172,30 +174,37 @@ dataframe = dataframe[dataframe.only_verified == only_verified_results]
 st.experimental_set_query_params(**{"dataset": [dataset]})
-dataset_df = dataframe[dataframe.dataset == dataset]
 dataset_df = dataset_df.dropna(axis="columns", how="all")
 if len(dataset_df) > 0:
     selectable_configs = list(set(dataset_df["config"]))
-    config = st.sidebar.selectbox(
-        "Config",
-        selectable_configs,
-        help="Filter the results on the current leaderboard by the dataset config. Self-reported results might not report the config, which is why \"-unspecified-\" is an option."
-    )
-    dataset_df = dataset_df[dataset_df.config == config]
-    selectable_splits = list(set(dataset_df["split"]))
-    split = st.sidebar.selectbox(
-        "Split",
-        selectable_splits,
-        help="Filter the results on the current leaderboard by the dataset split. Self-reported results might not report the split, which is why \"-unspecified-\" is an option."
-    )
-    dataset_df = dataset_df[dataset_df.split == split]
-    selectable_metrics = list(filter(lambda column: column not in ("model_id", "dataset", "split", "config", "pipeline_tag", "only_verified"), dataset_df.columns))
-    dataset_df = dataset_df.filter(["model_id"] + selectable_metrics)
     dataset_df = dataset_df.dropna(thresh=2)  # Want at least two non-na values (one for model_id and one for a metric).
     sorting_metric = st.sidebar.radio(
@@ -213,19 +222,38 @@ if len(dataset_df) > 0:
     )
     st.markdown(
-        "Note: if you do not see your self-reported results here, ensure that your results are in the expected range for all metrics. E.g., accuracy is 0-1, not 0-100."
     )
-    # Make the default metric appear right after model names
     cols = dataset_df.columns.tolist()
     cols.remove(sorting_metric)
-    cols = cols[:1] + [sorting_metric] + cols[1:]
     dataset_df = dataset_df[cols]
     # Sort the leaderboard, giving the sorting metric highest priority and then ordering by other metrics in the case of equal values.
-    dataset_df = dataset_df.sort_values(by=cols[1:], ascending=[metric in ascending_metrics for metric in cols[1:]])
     dataset_df = dataset_df.replace(np.nan, '-')
     # Make the leaderboard
     gb = GridOptionsBuilder.from_dataframe(dataset_df)
     gb.configure_default_column(sortable=False)
@@ -233,6 +261,11 @@ if len(dataset_df) > 0:
         "model_id",
         cellRenderer=JsCode('''function(params) {return '<a target="_blank" href="https://huggingface.co/'+params.value+'">'+params.value+'</a>'}'''),
     )
     for name in selectable_metrics:
         gb.configure_column(name, type=["numericColumn","numberColumnFilter","customNumericFormat"], precision=4, aggFunc='sum')

         if not isinstance(result, dict) or "dataset" not in result or "metrics" not in result or "type" not in result["dataset"]:
             continue
         dataset = result["dataset"]["type"]
+        if dataset == "":
+            continue
         row = {"dataset": dataset, "split": "-unspecified-", "config": "-unspecified-"}
         if "split" in result["dataset"]:
             row["split"] = result["dataset"]["split"]
 if task != "-any-":
     dataframe = dataframe[dataframe.pipeline_tag == task]
+selectable_datasets = ["-any-"] + sorted(list(set(dataframe.dataset.tolist())), key=lambda name: name.lower())
 if "" in selectable_datasets:
     selectable_datasets.remove("")
 st.experimental_set_query_params(**{"dataset": [dataset]})
+if dataset != "-any-":
+    dataset_df = dataframe[dataframe.dataset == dataset]
+else:
+    dataset_df = dataframe
 dataset_df = dataset_df.dropna(axis="columns", how="all")
 if len(dataset_df) > 0:
     selectable_configs = list(set(dataset_df["config"]))
+    if dataset != "-any-":
+        config = st.sidebar.selectbox(
+            "Config",
+            selectable_configs,
+            help="Filter the results on the current leaderboard by the dataset config. Self-reported results might not report the config, which is why \"-unspecified-\" is an option."
+        )
+        dataset_df = dataset_df[dataset_df.config == config]
+        selectable_splits = list(set(dataset_df["split"]))
+        split = st.sidebar.selectbox(
+            "Split",
+            selectable_splits,
+            help="Filter the results on the current leaderboard by the dataset split. Self-reported results might not report the split, which is why \"-unspecified-\" is an option."
+        )
+        dataset_df = dataset_df[dataset_df.split == split]
+    not_selectable_metrics = ["model_id", "dataset", "split", "config", "pipeline_tag", "only_verified"]
+    selectable_metrics = list(filter(lambda column: column not in not_selectable_metrics, dataset_df.columns))
+    dataset_df = dataset_df.filter(["model_id"] + (["dataset"] if dataset == "-any-" else []) + selectable_metrics)
     dataset_df = dataset_df.dropna(thresh=2)  # Want at least two non-na values (one for model_id and one for a metric).
     sorting_metric = st.sidebar.radio(
     )
     st.markdown(
+        "If you do not see your self-reported results here, ensure that your results are in the expected range for all metrics. E.g., accuracy is 0-1, not 0-100."
     )
+    if dataset == "-any-":
+        st.info(
+            "Note: you haven't chosen a dataset, so the leaderboard is showing the best scoring model for each dataset."
+        )
+    # Make the default metric appear right after model names and dataset names
     cols = dataset_df.columns.tolist()
     cols.remove(sorting_metric)
+    sorting_metric_index = 1 if dataset != "-any-" else 2
+    cols = cols[:sorting_metric_index] + [sorting_metric] + cols[sorting_metric_index:]
     dataset_df = dataset_df[cols]
     # Sort the leaderboard, giving the sorting metric highest priority and then ordering by other metrics in the case of equal values.
+    dataset_df = dataset_df.sort_values(by=cols[sorting_metric_index:], ascending=[metric in ascending_metrics for metric in cols[sorting_metric_index:]])
     dataset_df = dataset_df.replace(np.nan, '-')
+    # If dataset is "-any-", only show the best model for each dataset. Otherwise
+    # The leaderboard is way too long and doesn't give the users a feel for all of
+    # the datasets available for a task.
+    if dataset == "-any-":
+        filtered_dataset_df_dict = {column: [] for column in dataset_df.columns}
+        seen_datasets = set()
+        for _, row in dataset_df.iterrows():
+            if row["dataset"] not in seen_datasets:
+                for column in dataset_df.columns:
+                    filtered_dataset_df_dict[column].append(row[column])
+                seen_datasets.add(row["dataset"])
+        dataset_df = pd.DataFrame(filtered_dataset_df_dict)
     # Make the leaderboard
     gb = GridOptionsBuilder.from_dataframe(dataset_df)
     gb.configure_default_column(sortable=False)
         "model_id",
         cellRenderer=JsCode('''function(params) {return '<a target="_blank" href="https://huggingface.co/'+params.value+'">'+params.value+'</a>'}'''),
     )
+    if dataset == "-any-":
+        gb.configure_column(
+            "dataset",
+            cellRenderer=JsCode('''function(params) {return '<a target="_blank" href="https://huggingface.co/spaces/autoevaluate/leaderboards?dataset='+params.value+'">'+params.value+'</a>'}'''),
+        )
     for name in selectable_metrics:
         gb.configure_column(name, type=["numericColumn","numberColumnFilter","customNumericFormat"], precision=4, aggFunc='sum')