Tristan Thrush commited on
Commit
23ca923
β€’
1 Parent(s): 30f749f

removed requirement to be from autoeval org

Browse files
Files changed (1) hide show
  1. app.py +31 -30
app.py CHANGED
@@ -45,30 +45,24 @@ def parse_metric_value(value):
45
  return value
46
 
47
 
48
- def parse_metrics_rows(meta, from_autoeval=False):
49
  if not isinstance(meta["model-index"], list) or len(meta["model-index"]) == 0 or "results" not in meta["model-index"][0]:
50
  return None
51
  for result in meta["model-index"][0]["results"]:
52
  if not isinstance(result, dict) or "dataset" not in result or "metrics" not in result or "type" not in result["dataset"]:
53
  continue
54
  dataset = result["dataset"]["type"]
55
- row = {"dataset": dataset, "split": "-unspecified-", "config": "-unspecified-", "verified": from_autoeval}
56
  if "split" in result["dataset"]:
57
  row["split"] = result["dataset"]["split"]
58
  if "config" in result["dataset"]:
59
  row["config"] = result["dataset"]["config"]
60
  no_results = True
61
  for metric in result["metrics"]:
 
62
 
63
- # On autoeval cards, name is consistent. name seems less consistent than
64
- # type for self-reported results on user model cards though.
65
- if from_autoeval:
66
- name = metric["name"].lower().strip()
67
- else:
68
- name = metric["type"].lower().strip()
69
-
70
- if name in ("model_id", "dataset", "split", "config", "verified"):
71
- # Metrics are not allowed to be named "dataset", "split", "config", or "verified".
72
  continue
73
  value = parse_metric_value(metric.get("value", None))
74
  if value is None:
@@ -78,10 +72,7 @@ def parse_metrics_rows(meta, from_autoeval=False):
78
  if name not in row or new_metric_better:
79
  # overwrite the metric if the new value is better.
80
 
81
- if from_autoeval:
82
- # if the metric is from autoeval, only include it in the leaderboard if
83
- # it is a verified metric. Unverified metrics are already included
84
- # in the leaderboard from the unverified model card.
85
  if "verified" in metric and metric["verified"]:
86
  no_results = False
87
  row[name] = value
@@ -97,52 +88,65 @@ def get_data_wrapper():
97
 
98
  def get_data():
99
  data = []
100
- model_ids = get_model_ids()
 
101
  model_ids_from_autoeval = set(get_model_ids(author="autoevaluate"))
102
  for model_id in tqdm(model_ids):
103
  meta = get_metadata(model_id)
104
  if meta is None:
105
  continue
106
- for row in parse_metrics_rows(meta, from_autoeval=model_id in model_ids_from_autoeval):
107
  if row is None:
108
  continue
109
  row["model_id"] = model_id
110
  data.append(row)
 
 
 
 
 
111
  dataframe = pd.DataFrame.from_records(data)
112
  dataframe.to_pickle("cache.pkl")
 
 
113
 
114
- if exists("cache.pkl"):
115
  # If we have saved the results previously, call an asynchronous process
116
  # to fetch the results and update the saved file. Don't make users wait
117
  # while we fetch the new results. Instead, display the old results for
118
  # now. The new results should be loaded when this method
119
  # is called again.
120
  dataframe = pd.read_pickle("cache.pkl")
 
121
  t = threading.Thread(name='get_data procs', target=get_data)
122
  t.start()
123
  else:
124
  # We have to make the users wait during the first startup of this app.
125
  get_data()
126
  dataframe = pd.read_pickle("cache.pkl")
 
127
 
128
- return dataframe
129
-
130
- dataframe = get_data_wrapper()
131
 
132
- selectable_datasets = list(set(dataframe.dataset.tolist()))
133
 
134
  st.markdown("# πŸ€— Leaderboards")
135
 
 
 
 
 
 
 
 
 
 
136
  query_params = st.experimental_get_query_params()
137
  default_dataset = "common_voice"
138
  if "dataset" in query_params:
139
  if len(query_params["dataset"]) > 0 and query_params["dataset"][0] in selectable_datasets:
140
  default_dataset = query_params["dataset"][0]
141
 
142
- only_verified_results = st.sidebar.checkbox(
143
- "Filter for Verified Results",
144
- )
145
-
146
  dataset = st.sidebar.selectbox(
147
  "Dataset",
148
  selectable_datasets,
@@ -154,9 +158,6 @@ st.experimental_set_query_params(**{"dataset": [dataset]})
154
  dataset_df = dataframe[dataframe.dataset == dataset]
155
  dataset_df = dataset_df.dropna(axis="columns", how="all")
156
 
157
- if only_verified_results:
158
- dataset_df = dataset_df[dataset_df["verified"]]
159
-
160
  selectable_configs = list(set(dataset_df["config"]))
161
  config = st.sidebar.selectbox(
162
  "Config",
@@ -171,7 +172,7 @@ split = st.sidebar.selectbox(
171
  )
172
  dataset_df = dataset_df[dataset_df.split == split]
173
 
174
- selectable_metrics = list(filter(lambda column: column not in ("model_id", "dataset", "split", "config", "verified"), dataset_df.columns))
175
 
176
  dataset_df = dataset_df.filter(["model_id"] + selectable_metrics)
177
  dataset_df = dataset_df.dropna(thresh=2) # Want at least two non-na values (one for model_id and one for a metric).
45
  return value
46
 
47
 
48
+ def parse_metrics_rows(meta, only_verified=False):
49
  if not isinstance(meta["model-index"], list) or len(meta["model-index"]) == 0 or "results" not in meta["model-index"][0]:
50
  return None
51
  for result in meta["model-index"][0]["results"]:
52
  if not isinstance(result, dict) or "dataset" not in result or "metrics" not in result or "type" not in result["dataset"]:
53
  continue
54
  dataset = result["dataset"]["type"]
55
+ row = {"dataset": dataset, "split": "-unspecified-", "config": "-unspecified-"}
56
  if "split" in result["dataset"]:
57
  row["split"] = result["dataset"]["split"]
58
  if "config" in result["dataset"]:
59
  row["config"] = result["dataset"]["config"]
60
  no_results = True
61
  for metric in result["metrics"]:
62
+ name = metric["type"].lower().strip()
63
 
64
+ if name in ("model_id", "dataset", "split", "config"):
65
+ # Metrics are not allowed to be named "dataset", "split", "config".
 
 
 
 
 
 
 
66
  continue
67
  value = parse_metric_value(metric.get("value", None))
68
  if value is None:
72
  if name not in row or new_metric_better:
73
  # overwrite the metric if the new value is better.
74
 
75
+ if only_verified:
 
 
 
76
  if "verified" in metric and metric["verified"]:
77
  no_results = False
78
  row[name] = value
88
 
89
  def get_data():
90
  data = []
91
+ verified_data = []
92
+ model_ids = get_model_ids()[:100]
93
  model_ids_from_autoeval = set(get_model_ids(author="autoevaluate"))
94
  for model_id in tqdm(model_ids):
95
  meta = get_metadata(model_id)
96
  if meta is None:
97
  continue
98
+ for row in parse_metrics_rows(meta):
99
  if row is None:
100
  continue
101
  row["model_id"] = model_id
102
  data.append(row)
103
+ for row in parse_metrics_rows(meta, only_verified=True):
104
+ if row is None:
105
+ continue
106
+ row["model_id"] = model_id
107
+ verified_data.append(row)
108
  dataframe = pd.DataFrame.from_records(data)
109
  dataframe.to_pickle("cache.pkl")
110
+ verified_dataframe = pd.DataFrame.from_records(verified_data)
111
+ verified_dataframe.to_pickle("verified_cache.pkl")
112
 
113
+ if exists("cache.pkl") and exists("verified_cache.pkl"):
114
  # If we have saved the results previously, call an asynchronous process
115
  # to fetch the results and update the saved file. Don't make users wait
116
  # while we fetch the new results. Instead, display the old results for
117
  # now. The new results should be loaded when this method
118
  # is called again.
119
  dataframe = pd.read_pickle("cache.pkl")
120
+ verified_dataframe = pd.read_pickle("verified_cache.pkl")
121
  t = threading.Thread(name='get_data procs', target=get_data)
122
  t.start()
123
  else:
124
  # We have to make the users wait during the first startup of this app.
125
  get_data()
126
  dataframe = pd.read_pickle("cache.pkl")
127
+ verified_dataframe = pd.read_pickle("verified_cache.pkl")
128
 
129
+ return dataframe, verified_dataframe
 
 
130
 
131
+ dataframe, verified_dataframe = get_data_wrapper()
132
 
133
  st.markdown("# πŸ€— Leaderboards")
134
 
135
+ only_verified_results = st.sidebar.checkbox(
136
+ "Filter for Verified Results",
137
+ )
138
+
139
+ if only_verified_results:
140
+ dataframe = verified_dataframe
141
+
142
+ selectable_datasets = list(set(dataframe.dataset.tolist()))
143
+
144
  query_params = st.experimental_get_query_params()
145
  default_dataset = "common_voice"
146
  if "dataset" in query_params:
147
  if len(query_params["dataset"]) > 0 and query_params["dataset"][0] in selectable_datasets:
148
  default_dataset = query_params["dataset"][0]
149
 
 
 
 
 
150
  dataset = st.sidebar.selectbox(
151
  "Dataset",
152
  selectable_datasets,
158
  dataset_df = dataframe[dataframe.dataset == dataset]
159
  dataset_df = dataset_df.dropna(axis="columns", how="all")
160
 
 
 
 
161
  selectable_configs = list(set(dataset_df["config"]))
162
  config = st.sidebar.selectbox(
163
  "Config",
172
  )
173
  dataset_df = dataset_df[dataset_df.split == split]
174
 
175
+ selectable_metrics = list(filter(lambda column: column not in ("model_id", "dataset", "split", "config"), dataset_df.columns))
176
 
177
  dataset_df = dataset_df.filter(["model_id"] + selectable_metrics)
178
  dataset_df = dataset_df.dropna(thresh=2) # Want at least two non-na values (one for model_id and one for a metric).