Tristan Thrush commited on
Commit
bb28608
1 Parent(s): fe77dfe

added selection of verified results

Browse files
Files changed (1) hide show
  1. app.py +40 -13
app.py CHANGED
@@ -10,9 +10,12 @@ from os.path import exists
10
  import threading
11
 
12
 
13
- def get_model_ids():
14
  api = HfApi()
15
- models = api.list_models(filter="model-index")
 
 
 
16
  model_ids = [x.modelId for x in models]
17
  return model_ids
18
 
@@ -42,24 +45,39 @@ def parse_metric_value(value):
42
  return value
43
 
44
 
45
- def parse_metrics_rows(meta):
46
  if not isinstance(meta["model-index"], list) or len(meta["model-index"]) == 0 or "results" not in meta["model-index"][0]:
47
  return None
48
  for result in meta["model-index"][0]["results"]:
49
  if not isinstance(result, dict) or "dataset" not in result or "metrics" not in result or "type" not in result["dataset"]:
50
  continue
51
  dataset = result["dataset"]["type"]
52
- if "args" not in result["dataset"]:
53
- continue
54
- row = {"dataset": dataset}
 
 
55
  for metric in result["metrics"]:
56
  type = metric["type"].lower().strip()
 
 
 
57
  value = parse_metric_value(metric.get("value", None))
58
  if value is None:
59
  continue
60
- if type not in row or value < row[type]:
61
- # overwrite the metric if the new value is lower (e.g. with LM)
62
- row[type] = value
 
 
 
 
 
 
 
 
 
 
63
  yield row
64
 
65
  @st.cache(ttl=3600)
@@ -68,11 +86,12 @@ def get_data_wrapper():
68
  def get_data():
69
  data = []
70
  model_ids = get_model_ids()
 
71
  for model_id in tqdm(model_ids):
72
  meta = get_metadata(model_id)
73
  if meta is None:
74
  continue
75
- for row in parse_metrics_rows(meta):
76
  if row is None:
77
  continue
78
  row["model_id"] = model_id
@@ -108,6 +127,10 @@ if "dataset" in query_params:
108
  if len(query_params["dataset"]) > 0 and query_params["dataset"][0] in selectable_datasets:
109
  default_dataset = query_params["dataset"][0]
110
 
 
 
 
 
111
  dataset = st.sidebar.selectbox(
112
  "Dataset",
113
  selectable_datasets,
@@ -118,15 +141,19 @@ st.experimental_set_query_params(**{"dataset": [dataset]})
118
  dataset_df = dataframe[dataframe.dataset == dataset]
119
  dataset_df = dataset_df.dropna(axis="columns", how="all")
120
 
 
 
 
121
  selectable_metrics = list(filter(lambda column: column not in ("model_id", "dataset"), dataset_df.columns))
 
 
 
 
122
  sorting_metric = st.sidebar.radio(
123
  "Sorting Metric",
124
  selectable_metrics,
125
  )
126
 
127
- dataset_df = dataset_df.filter(["model_id"] + selectable_metrics)
128
- dataset_df = dataset_df.dropna(thresh=2) # Want at least two non-na values (one for model_id and one for a metric).
129
-
130
  st.markdown(
131
  "Please click on the model's name to be redirected to its model card."
132
  )
10
  import threading
11
 
12
 
13
+ def get_model_ids(author=None):
14
  api = HfApi()
15
+ if author is None:
16
+ models = api.list_models(filter="model-index")
17
+ else:
18
+ models = api.list_models(filter="model-index", author="autoevaluate")
19
  model_ids = [x.modelId for x in models]
20
  return model_ids
21
 
45
  return value
46
 
47
 
48
+ def parse_metrics_rows(meta, from_autoeval=False):
49
  if not isinstance(meta["model-index"], list) or len(meta["model-index"]) == 0 or "results" not in meta["model-index"][0]:
50
  return None
51
  for result in meta["model-index"][0]["results"]:
52
  if not isinstance(result, dict) or "dataset" not in result or "metrics" not in result or "type" not in result["dataset"]:
53
  continue
54
  dataset = result["dataset"]["type"]
55
+ row = {"dataset": dataset, "split": None, "config": None, "verified": from_autoeval}
56
+ if "split" in result["dataset"]:
57
+ row["split"] = result["dataset"]["split"]
58
+ if "config" in result["dataset"]:
59
+ row["config"] = result["dataset"]["config"]
60
  for metric in result["metrics"]:
61
  type = metric["type"].lower().strip()
62
+ if type not in ("dataset", "split", "config", "verified"):
63
+ # Metrics are not allowed to be named "dataset", "split", "config", or "verified".
64
+ continue
65
  value = parse_metric_value(metric.get("value", None))
66
  if value is None:
67
  continue
68
+ if type in row:
69
+ new_metric_better = value < row[type] if type in ascending_metrics else value > row[type]
70
+ if type not in row or new_metric_better:
71
+ # overwrite the metric if the new value is better.
72
+
73
+ if from_autoeval:
74
+ # if the metric is from autoeval, only include it in the leaderboard if
75
+ # it is a verified metric. Unverified metrics are already included
76
+ # in the leaderboard from the unverified model card.
77
+ if "verified" in metric and metric["verified"]:
78
+ row[type] = value
79
+ else:
80
+ row[type] = value
81
  yield row
82
 
83
  @st.cache(ttl=3600)
86
  def get_data():
87
  data = []
88
  model_ids = get_model_ids()
89
+ model_ids_from_autoeval = set(get_model_ids(author="autoevaluate"))
90
  for model_id in tqdm(model_ids):
91
  meta = get_metadata(model_id)
92
  if meta is None:
93
  continue
94
+ for row in parse_metrics_rows(meta, from_autoeval=model_id in model_ids_from_autoeval):
95
  if row is None:
96
  continue
97
  row["model_id"] = model_id
127
  if len(query_params["dataset"]) > 0 and query_params["dataset"][0] in selectable_datasets:
128
  default_dataset = query_params["dataset"][0]
129
 
130
+ only_verified_results = st.sidebar.checkbox(
131
+ "Filter for Verified Results",
132
+ )
133
+
134
  dataset = st.sidebar.selectbox(
135
  "Dataset",
136
  selectable_datasets,
141
  dataset_df = dataframe[dataframe.dataset == dataset]
142
  dataset_df = dataset_df.dropna(axis="columns", how="all")
143
 
144
+ if only_verified_results:
145
+ dataset_df = dataset_df[dataset_df["verified"]]
146
+
147
  selectable_metrics = list(filter(lambda column: column not in ("model_id", "dataset"), dataset_df.columns))
148
+
149
+ dataset_df = dataset_df.filter(["model_id"] + selectable_metrics)
150
+ dataset_df = dataset_df.dropna(thresh=2) # Want at least two non-na values (one for model_id and one for a metric).
151
+
152
  sorting_metric = st.sidebar.radio(
153
  "Sorting Metric",
154
  selectable_metrics,
155
  )
156
 
 
 
 
157
  st.markdown(
158
  "Please click on the model's name to be redirected to its model card."
159
  )