TemryL commited on
Commit
b5a57e1
·
1 Parent(s): b34f2b8

filter feature set

Browse files
Files changed (3) hide show
  1. app.py +29 -13
  2. src/display/utils.py +1 -0
  3. src/leaderboard/read_evals.py +5 -2
app.py CHANGED
@@ -70,6 +70,7 @@ def update_table(
70
  columns: list,
71
  phenotypes: list,
72
  metrics: list,
 
73
  nb_shots: list,
74
  type_query: list,
75
  precision_query: str,
@@ -77,7 +78,7 @@ def update_table(
77
  show_deleted: bool,
78
  query: str,
79
  ):
80
- filtered_df = filter_models(hidden_df, type_query, size_query, precision_query, show_deleted, nb_shots)
81
  filtered_df = filter_queries(query, filtered_df)
82
  df = select_columns(filtered_df, columns, phenotypes, metrics)
83
  return df
@@ -91,6 +92,7 @@ def select_columns(df: pd.DataFrame, columns: list, phenotypes: list, metrics:li
91
  always_here_cols = [
92
  AutoEvalColumn.model_type_symbol.name,
93
  AutoEvalColumn.model.name,
 
94
  AutoEvalColumn.nb_shots.name,
95
  ]
96
 
@@ -125,7 +127,7 @@ def filter_queries(query: str, filtered_df: pd.DataFrame) -> pd.DataFrame:
125
 
126
 
127
  def filter_models(
128
- df: pd.DataFrame, type_query: list, size_query: list, precision_query: list, show_deleted: bool, nb_shots: list) -> pd.DataFrame:
129
  # Show all models
130
  if show_deleted:
131
  filtered_df = df
@@ -137,6 +139,7 @@ def filter_models(
137
  filtered_df = filtered_df.loc[df[AutoEvalColumn.precision.name].isin(precision_query + ["None"])]
138
  if -1 not in nb_shots:
139
  filtered_df = filtered_df.loc[df[AutoEvalColumn.nb_shots.name].isin(nb_shots)]
 
140
 
141
  numeric_interval = pd.IntervalIndex(sorted([NUMERIC_INTERVALS[s] for s in size_query]))
142
  params_column = pd.to_numeric(df[AutoEvalColumn.params.name], errors="coerce")
@@ -155,6 +158,12 @@ with demo:
155
  with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
156
  with gr.Row():
157
  with gr.Column():
 
 
 
 
 
 
158
  with gr.Row():
159
  with gr.Column(min_width=320):
160
  shown_phenotypes = gr.CheckboxGroup(
@@ -173,6 +182,11 @@ with demo:
173
  for c in fields(AutoEvalColumn)
174
  if not c.hidden and not c.never_hidden and c.is_task
175
  ])),
 
 
 
 
 
176
  label="Select metrics to show",
177
  elem_id="metric-select",
178
  interactive=True,
@@ -193,18 +207,23 @@ with demo:
193
  elem_id="column-select",
194
  interactive=True,
195
  )
196
- with gr.Column(min_width=320):
197
  with gr.Row():
198
- search_bar = gr.Textbox(
199
- placeholder=" 🔍 Search for your model (separate multiple queries with `;`) and press ENTER...",
200
- show_label=False,
201
- elem_id="search-bar",
202
  )
 
203
  with gr.Column(min_width=320):
 
 
 
 
 
 
 
204
  filter_nb_shots = gr.CheckboxGroup(
205
  label="Number of shots",
206
  choices=[("Zero-shot", 0), ("10-shot", 10), ("All", -1)],
207
- value=[-1],
208
  interactive=True,
209
  elem_id="filter-nb-shots",
210
  )
@@ -229,10 +248,6 @@ with demo:
229
  interactive=True,
230
  elem_id="filter-columns-size",
231
  )
232
- with gr.Row():
233
- deleted_models_visibility = gr.Checkbox(
234
- value=True, label="Show gated/private/deleted models", interactive=True
235
- )
236
 
237
  leaderboard_table = gr.components.Dataframe(
238
  value=leaderboard_df[
@@ -274,7 +289,7 @@ with demo:
274
  ],
275
  leaderboard_table,
276
  )
277
- for selector in [shown_phenotypes, shown_metrics, shown_columns, filter_columns_type, filter_columns_precision, filter_columns_size, deleted_models_visibility, filter_nb_shots]:
278
  selector.change(
279
  update_table,
280
  [
@@ -282,6 +297,7 @@ with demo:
282
  shown_columns,
283
  shown_phenotypes,
284
  shown_metrics,
 
285
  filter_nb_shots,
286
  filter_columns_type,
287
  filter_columns_precision,
 
70
  columns: list,
71
  phenotypes: list,
72
  metrics: list,
73
+ feature_sets: list,
74
  nb_shots: list,
75
  type_query: list,
76
  precision_query: str,
 
78
  show_deleted: bool,
79
  query: str,
80
  ):
81
+ filtered_df = filter_models(hidden_df, type_query, size_query, precision_query, show_deleted, feature_sets, nb_shots)
82
  filtered_df = filter_queries(query, filtered_df)
83
  df = select_columns(filtered_df, columns, phenotypes, metrics)
84
  return df
 
92
  always_here_cols = [
93
  AutoEvalColumn.model_type_symbol.name,
94
  AutoEvalColumn.model.name,
95
+ AutoEvalColumn.feature_set.name,
96
  AutoEvalColumn.nb_shots.name,
97
  ]
98
 
 
127
 
128
 
129
  def filter_models(
130
+ df: pd.DataFrame, type_query: list, size_query: list, precision_query: list, show_deleted: bool, feature_sets: list, nb_shots: list) -> pd.DataFrame:
131
  # Show all models
132
  if show_deleted:
133
  filtered_df = df
 
139
  filtered_df = filtered_df.loc[df[AutoEvalColumn.precision.name].isin(precision_query + ["None"])]
140
  if -1 not in nb_shots:
141
  filtered_df = filtered_df.loc[df[AutoEvalColumn.nb_shots.name].isin(nb_shots)]
142
+ filtered_df = filtered_df.loc[df[AutoEvalColumn.feature_set.name].isin(feature_sets)]
143
 
144
  numeric_interval = pd.IntervalIndex(sorted([NUMERIC_INTERVALS[s] for s in size_query]))
145
  params_column = pd.to_numeric(df[AutoEvalColumn.params.name], errors="coerce")
 
158
  with gr.TabItem("🏅 LLM Benchmark", elem_id="llm-benchmark-tab-table", id=0):
159
  with gr.Row():
160
  with gr.Column():
161
+ with gr.Row():
162
+ search_bar = gr.Textbox(
163
+ placeholder=" 🔍 Search for your model (separate multiple queries with `;`) and press ENTER...",
164
+ show_label=False,
165
+ elem_id="search-bar",
166
+ )
167
  with gr.Row():
168
  with gr.Column(min_width=320):
169
  shown_phenotypes = gr.CheckboxGroup(
 
182
  for c in fields(AutoEvalColumn)
183
  if not c.hidden and not c.never_hidden and c.is_task
184
  ])),
185
+ value=sorted(set([
186
+ c.task.value.metric_name
187
+ for c in fields(AutoEvalColumn)
188
+ if not c.hidden and not c.never_hidden and c.is_task
189
+ ])),
190
  label="Select metrics to show",
191
  elem_id="metric-select",
192
  interactive=True,
 
207
  elem_id="column-select",
208
  interactive=True,
209
  )
 
210
  with gr.Row():
211
+ deleted_models_visibility = gr.Checkbox(
212
+ value=True, label="Show gated/private/deleted models", interactive=True
 
 
213
  )
214
+ with gr.Column(min_width=320):
215
  with gr.Column(min_width=320):
216
+ filter_features = gr.CheckboxGroup(
217
+ label="Features Set",
218
+ choices=[("Baseline (age, sex, BMI)", "baseline"), ("Expanded (age, sex, BMI, HDL, LDL, total-cholesterol, triglycerides, diastolic-blood-pressure, smoking-status, snoring, insomnia, daytime-napping, sleep-duration, chronotype)", "expanded")],
219
+ value=["baseline"],
220
+ interactive=True,
221
+ elem_id="filter-feature-set",
222
+ )
223
  filter_nb_shots = gr.CheckboxGroup(
224
  label="Number of shots",
225
  choices=[("Zero-shot", 0), ("10-shot", 10), ("All", -1)],
226
+ value=[0],
227
  interactive=True,
228
  elem_id="filter-nb-shots",
229
  )
 
248
  interactive=True,
249
  elem_id="filter-columns-size",
250
  )
 
 
 
 
251
 
252
  leaderboard_table = gr.components.Dataframe(
253
  value=leaderboard_df[
 
289
  ],
290
  leaderboard_table,
291
  )
292
+ for selector in [shown_phenotypes, shown_metrics, shown_columns, filter_columns_type, filter_columns_precision, filter_columns_size, deleted_models_visibility, filter_nb_shots, filter_features]:
293
  selector.change(
294
  update_table,
295
  [
 
297
  shown_columns,
298
  shown_phenotypes,
299
  shown_metrics,
300
+ filter_features,
301
  filter_nb_shots,
302
  filter_columns_type,
303
  filter_columns_precision,
src/display/utils.py CHANGED
@@ -30,6 +30,7 @@ auto_eval_column_dict = []
30
  # Init
31
  auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
32
  auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
 
33
  auto_eval_column_dict.append(["nb_shots", ColumnContent, ColumnContent("#Shots", "number", True, never_hidden=True)])
34
  #Scores
35
  auto_eval_column_dict.append(["average_auroc", ColumnContent, ColumnContent("Average AUROC ⬆️", "number", True)])
 
30
  # Init
31
  auto_eval_column_dict.append(["model_type_symbol", ColumnContent, ColumnContent("T", "str", True, never_hidden=True)])
32
  auto_eval_column_dict.append(["model", ColumnContent, ColumnContent("Model", "markdown", True, never_hidden=True)])
33
+ auto_eval_column_dict.append(["feature_set", ColumnContent, ColumnContent("Feature Set", "str", True, never_hidden=True)])
34
  auto_eval_column_dict.append(["nb_shots", ColumnContent, ColumnContent("#Shots", "number", True, never_hidden=True)])
35
  #Scores
36
  auto_eval_column_dict.append(["average_auroc", ColumnContent, ColumnContent("Average AUROC ⬆️", "number", True)])
src/leaderboard/read_evals.py CHANGED
@@ -22,7 +22,8 @@ class EvalResult:
22
  revision: str # commit hash, "" if main
23
  results: dict
24
  raw_data: dict
25
- nb_shots: int = 0
 
26
  precision: Precision = Precision.Unknown
27
  model_type: ModelType = ModelType.Unknown # Pretrained, fine tuned, ...
28
  weight_type: WeightType = WeightType.Original # Original or Adapter
@@ -46,8 +47,8 @@ class EvalResult:
46
  model = full_model.split("/")[1]
47
  precision = Precision.from_str(config.get("precision"))
48
  revision = config.get("revision", "")
49
- feature_set = config.get("feature_set", "Unknown")
50
  nb_shots = config.get("nb_shots", None)
 
51
  model_type = ModelType.from_str(config.get("model_type", ""))
52
  weight_type = WeightType[config.get("weight_type", "Original")]
53
  license = config.get("license", "?")
@@ -83,6 +84,7 @@ class EvalResult:
83
  results=results,
84
  raw_data=data,
85
  nb_shots=nb_shots,
 
86
  precision=precision,
87
  revision=revision,
88
  still_on_hub=still_on_hub,
@@ -101,6 +103,7 @@ class EvalResult:
101
  average_auprc = np.mean(np.array([d["metrics"]["mean_auprc"] for d in self.raw_data["results"].values() if "mean_auprc" in d["metrics"].keys()]))
102
  data_dict = {
103
  "eval_name": self.eval_name, # not a column, just a save name,
 
104
  AutoEvalColumn.nb_shots.name: self.nb_shots,
105
  AutoEvalColumn.precision.name: self.precision.value.name,
106
  AutoEvalColumn.model_type.name: self.model_type.value.name,
 
22
  revision: str # commit hash, "" if main
23
  results: dict
24
  raw_data: dict
25
+ nb_shots: int
26
+ feature_set: str
27
  precision: Precision = Precision.Unknown
28
  model_type: ModelType = ModelType.Unknown # Pretrained, fine tuned, ...
29
  weight_type: WeightType = WeightType.Original # Original or Adapter
 
47
  model = full_model.split("/")[1]
48
  precision = Precision.from_str(config.get("precision"))
49
  revision = config.get("revision", "")
 
50
  nb_shots = config.get("nb_shots", None)
51
+ feature_set = config.get("feature_set", None)
52
  model_type = ModelType.from_str(config.get("model_type", ""))
53
  weight_type = WeightType[config.get("weight_type", "Original")]
54
  license = config.get("license", "?")
 
84
  results=results,
85
  raw_data=data,
86
  nb_shots=nb_shots,
87
+ feature_set=feature_set,
88
  precision=precision,
89
  revision=revision,
90
  still_on_hub=still_on_hub,
 
103
  average_auprc = np.mean(np.array([d["metrics"]["mean_auprc"] for d in self.raw_data["results"].values() if "mean_auprc" in d["metrics"].keys()]))
104
  data_dict = {
105
  "eval_name": self.eval_name, # not a column, just a save name,
106
+ AutoEvalColumn.feature_set.name: self.feature_set,
107
  AutoEvalColumn.nb_shots.name: self.nb_shots,
108
  AutoEvalColumn.precision.name: self.precision.value.name,
109
  AutoEvalColumn.model_type.name: self.model_type.value.name,