Tristan Thrush commited on
Commit
79668b2
β€’
1 Parent(s): 679fbc2

added task filtering

Browse files
Files changed (1) hide show
  1. app.py +32 -18
app.py CHANGED
@@ -49,8 +49,8 @@ def parse_metrics_rows(meta, only_verified=False):
49
  for metric in result["metrics"]:
50
  name = metric["type"].lower().strip()
51
 
52
- if name in ("model_id", "dataset", "split", "config"):
53
- # Metrics are not allowed to be named "dataset", "split", "config".
54
  continue
55
  value = parse_metric_value(metric.get("value", None))
56
  if value is None:
@@ -88,36 +88,36 @@ def get_data_wrapper():
88
  if row is None:
89
  continue
90
  row["model_id"] = model_info.id
 
 
91
  data.append(row)
92
  for row in parse_metrics_rows(meta, only_verified=True):
93
  if row is None:
94
  continue
95
  row["model_id"] = model_info.id
96
- verified_data.append(row)
 
 
97
  dataframe = pd.DataFrame.from_records(data)
98
  dataframe.to_pickle("cache.pkl")
99
- verified_dataframe = pd.DataFrame.from_records(verified_data)
100
- verified_dataframe.to_pickle("verified_cache.pkl")
101
 
102
- if exists("cache.pkl") and exists("verified_cache.pkl"):
103
  # If we have saved the results previously, call an asynchronous process
104
  # to fetch the results and update the saved file. Don't make users wait
105
  # while we fetch the new results. Instead, display the old results for
106
  # now. The new results should be loaded when this method
107
  # is called again.
108
  dataframe = pd.read_pickle("cache.pkl")
109
- verified_dataframe = pd.read_pickle("verified_cache.pkl")
110
- t = threading.Thread(name='get_data procs', target=get_data)
111
  t.start()
112
  else:
113
  # We have to make the users wait during the first startup of this app.
114
  get_data()
115
  dataframe = pd.read_pickle("cache.pkl")
116
- verified_dataframe = pd.read_pickle("verified_cache.pkl")
117
 
118
- return dataframe, verified_dataframe
119
 
120
- dataframe, verified_dataframe = get_data_wrapper()
121
 
122
  st.markdown("# πŸ€— Leaderboards")
123
 
@@ -125,17 +125,29 @@ only_verified_results = st.sidebar.checkbox(
125
  "Filter for Verified Results",
126
  )
127
 
128
- selectable_datasets = sorted(list(set(dataframe.dataset.tolist())), key=lambda name: name.lower())
 
 
 
 
 
 
 
 
129
 
130
- if only_verified_results:
131
- dataframe = verified_dataframe
 
 
 
 
132
 
133
  query_params = st.experimental_get_query_params()
134
  if "first_query_params" not in st.session_state:
135
  st.session_state.first_query_params = query_params
136
  first_query_params = st.session_state.first_query_params
137
 
138
- default_dataset = "common_voice"
139
  if "dataset" in first_query_params:
140
  if len(first_query_params["dataset"]) > 0 and first_query_params["dataset"][0] in selectable_datasets:
141
  default_dataset = first_query_params["dataset"][0]
@@ -143,9 +155,11 @@ if "dataset" in first_query_params:
143
  dataset = st.sidebar.selectbox(
144
  "Dataset",
145
  selectable_datasets,
146
- index=selectable_datasets.index(default_dataset),
147
  )
148
 
 
 
149
  st.experimental_set_query_params(**{"dataset": [dataset]})
150
 
151
  dataset_df = dataframe[dataframe.dataset == dataset]
@@ -167,7 +181,7 @@ if len(dataset_df) > 0:
167
  )
168
  dataset_df = dataset_df[dataset_df.split == split]
169
 
170
- selectable_metrics = list(filter(lambda column: column not in ("model_id", "dataset", "split", "config"), dataset_df.columns))
171
 
172
  dataset_df = dataset_df.filter(["model_id"] + selectable_metrics)
173
  dataset_df = dataset_df.dropna(thresh=2) # Want at least two non-na values (one for model_id and one for a metric).
@@ -217,5 +231,5 @@ if len(dataset_df) > 0:
217
 
218
  else:
219
  st.markdown(
220
- "No data to display."
221
  )
49
  for metric in result["metrics"]:
50
  name = metric["type"].lower().strip()
51
 
52
+ if name in ("model_id", "dataset", "split", "config", "pipeline_tag", "only_verified"):
53
+ # Metrics are not allowed to be named "dataset", "split", "config", "pipeline_tag"
54
  continue
55
  value = parse_metric_value(metric.get("value", None))
56
  if value is None:
88
  if row is None:
89
  continue
90
  row["model_id"] = model_info.id
91
+ row["pipeline_tag"] = model_info.pipeline_tag
92
+ row["only_verified"] = False
93
  data.append(row)
94
  for row in parse_metrics_rows(meta, only_verified=True):
95
  if row is None:
96
  continue
97
  row["model_id"] = model_info.id
98
+ row["pipeline_tag"] = model_info.pipeline_tag
99
+ row["only_verified"] = True
100
+ data.append(row)
101
  dataframe = pd.DataFrame.from_records(data)
102
  dataframe.to_pickle("cache.pkl")
 
 
103
 
104
+ if exists("cache.pkl"):
105
  # If we have saved the results previously, call an asynchronous process
106
  # to fetch the results and update the saved file. Don't make users wait
107
  # while we fetch the new results. Instead, display the old results for
108
  # now. The new results should be loaded when this method
109
  # is called again.
110
  dataframe = pd.read_pickle("cache.pkl")
111
+ t = threading.Thread(name="get_data procs", target=get_data)
 
112
  t.start()
113
  else:
114
  # We have to make the users wait during the first startup of this app.
115
  get_data()
116
  dataframe = pd.read_pickle("cache.pkl")
 
117
 
118
+ return dataframe
119
 
120
+ dataframe = get_data_wrapper()
121
 
122
  st.markdown("# πŸ€— Leaderboards")
123
 
125
  "Filter for Verified Results",
126
  )
127
 
128
+ selectable_tasks = list(set(dataframe.pipeline_tag))
129
+ if None in selectable_tasks:
130
+ selectable_tasks.remove(None)
131
+ selectable_tasks.sort(key=lambda name: name.lower())
132
+
133
+ task = st.sidebar.selectbox(
134
+ "Task",
135
+ ["-any-"] + selectable_tasks,
136
+ )
137
 
138
+ if task != "-any-":
139
+ dataframe = dataframe[dataframe.pipeline_tag == task]
140
+
141
+ selectable_datasets = sorted(list(set(dataframe.dataset.tolist())), key=lambda name: name.lower())
142
+ if "" in selectable_datasets:
143
+ selectable_datasets.remove("")
144
 
145
  query_params = st.experimental_get_query_params()
146
  if "first_query_params" not in st.session_state:
147
  st.session_state.first_query_params = query_params
148
  first_query_params = st.session_state.first_query_params
149
 
150
+ default_dataset = None
151
  if "dataset" in first_query_params:
152
  if len(first_query_params["dataset"]) > 0 and first_query_params["dataset"][0] in selectable_datasets:
153
  default_dataset = first_query_params["dataset"][0]
155
  dataset = st.sidebar.selectbox(
156
  "Dataset",
157
  selectable_datasets,
158
+ index=selectable_datasets.index(default_dataset) if default_dataset in selectable_datasets else 0,
159
  )
160
 
161
+ dataframe = dataframe[dataframe.only_verified == only_verified_results]
162
+
163
  st.experimental_set_query_params(**{"dataset": [dataset]})
164
 
165
  dataset_df = dataframe[dataframe.dataset == dataset]
181
  )
182
  dataset_df = dataset_df[dataset_df.split == split]
183
 
184
+ selectable_metrics = list(filter(lambda column: column not in ("model_id", "dataset", "split", "config", "pipeline_tag", "only_verified"), dataset_df.columns))
185
 
186
  dataset_df = dataset_df.filter(["model_id"] + selectable_metrics)
187
  dataset_df = dataset_df.dropna(thresh=2) # Want at least two non-na values (one for model_id and one for a metric).
231
 
232
  else:
233
  st.markdown(
234
+ "No " + ("verified" if only_verified_results else "unverified") + " results to display. Try toggling the verified results filter."
235
  )