natolambert commited on
Commit
0de05c0
β€’
1 Parent(s): 6ce351e
Files changed (1) hide show
  1. app.py +18 -7
app.py CHANGED
@@ -202,10 +202,14 @@ def regex_table(dataframe, regex, filter_button):
202
  # Join the list into a single regex pattern with '|' acting as OR
203
  combined_regex = '|'.join(regex_list)
204
 
 
 
 
205
  # if filter_button, remove all rows with "ai2" in the model name
 
206
  if isinstance(filter_button, list) or isinstance(filter_button, str):
207
- if "AI2 Experiments" not in filter_button and ("ai2" not in regex):
208
- dataframe = dataframe[~dataframe["Model"].str.contains("ai2", case=False, na=False)]
209
  if "Seq. Classifiers" not in filter_button:
210
  dataframe = dataframe[~dataframe["Model Type"].str.contains("Seq. Classifier", case=False, na=False)]
211
  if "DPO" not in filter_button:
@@ -220,6 +224,13 @@ def regex_table(dataframe, regex, filter_button):
220
  # replace column '' with count/rank
221
  data[''] = np.arange(1, 1 + len(data))
222
 
 
 
 
 
 
 
 
223
  # if Score exists, round to 2 decimals
224
  if "Score" in data.columns:
225
  data["Score"] = np.round(np.array(data["Score"].values).astype(float), 2)
@@ -255,8 +266,8 @@ with gr.Blocks(css=custom_css) as app:
255
  search_1 = gr.Textbox(label="Model Search (delimit with , )",
256
  placeholder="Model Search (delimit with , )",
257
  show_label=False)
258
- model_types_1 = gr.CheckboxGroup(["Seq. Classifiers", "DPO", "Custom Classifiers", "Generative", "AI2 Experiments"],
259
- value=["Seq. Classifiers", "DPO", "Custom Classifiers"],
260
  label="Model Types",
261
  show_label=False,
262
  # info="Which model types to include.",
@@ -270,7 +281,7 @@ with gr.Blocks(css=custom_css) as app:
270
  visible=False,
271
  )
272
  rewardbench_table = gr.Dataframe(
273
- regex_table(rewardbench_data_avg.copy(), "", ["Seq. Classifiers", "DPO", "Custom Classifiers"]).values,
274
  datatype=col_types_rewardbench_avg,
275
  headers=rewardbench_data_avg.columns.tolist(),
276
  elem_id="rewardbench_dataframe_avg",
@@ -280,7 +291,7 @@ with gr.Blocks(css=custom_css) as app:
280
  with gr.TabItem("πŸ” RewardBench - Detailed"):
281
  with gr.Row():
282
  search_2 = gr.Textbox(label="Model Search (delimit with , )", show_label=False, placeholder="Model Search (delimit with , )")
283
- model_types_2 = gr.CheckboxGroup(["Seq. Classifiers", "DPO", "Custom Classifiers", "Generative", "AI2 Experiments"],
284
  value=["Seq. Classifiers", "DPO", "Custom Classifiers"],
285
  label="Model Types",
286
  show_label=False,
@@ -320,7 +331,7 @@ with gr.Blocks(css=custom_css) as app:
320
  with gr.TabItem("Prior Test Sets"):
321
  with gr.Row():
322
  search_3 = gr.Textbox(label="Model Search (delimit with , )", show_label=False, placeholder="Model Search (delimit with , )")
323
- model_types_3 = gr.CheckboxGroup(["Seq. Classifiers", "DPO", "Custom Classifiers", "Generative", "AI2 Experiments"],
324
  value=["Seq. Classifiers", "DPO", "Custom Classifiers"],
325
  label="Model Types",
326
  show_label=False,
 
202
  # Join the list into a single regex pattern with '|' acting as OR
203
  combined_regex = '|'.join(regex_list)
204
 
205
+ # remove internal ai2 data
206
+ dataframe = dataframe[~dataframe["Model"].str.contains("ai2", case=False, na=False)]
207
+
208
  # if filter_button, remove all rows with "ai2" in the model name
209
+ update_scores = False
210
  if isinstance(filter_button, list) or isinstance(filter_button, str):
211
+ if "Prior Sets" not in filter_button and 'Prior Sets (0.5 weight)' in dataframe.columns:
212
+ update_scores = True
213
  if "Seq. Classifiers" not in filter_button:
214
  dataframe = dataframe[~dataframe["Model Type"].str.contains("Seq. Classifier", case=False, na=False)]
215
  if "DPO" not in filter_button:
 
224
  # replace column '' with count/rank
225
  data[''] = np.arange(1, 1 + len(data))
226
 
227
+ # if update the score to not use prior sets, do so
228
+ if update_scores:
229
+ data["Score"] = (data["Chat"] + data["Chat Hard"] + data["Safety"] + data["Reasoning"]) / 4
230
+ data["Prior Sets (0.5 weight)"] = np.NaN
231
+ # sort array by Score column
232
+ data = data.sort_values(by='Score', ascending=False)
233
+
234
  # if Score exists, round to 2 decimals
235
  if "Score" in data.columns:
236
  data["Score"] = np.round(np.array(data["Score"].values).astype(float), 2)
 
266
  search_1 = gr.Textbox(label="Model Search (delimit with , )",
267
  placeholder="Model Search (delimit with , )",
268
  show_label=False)
269
+ model_types_1 = gr.CheckboxGroup(["Seq. Classifiers", "DPO", "Custom Classifiers", "Generative", "Prior Sets"],
270
+ value=["Seq. Classifiers", "DPO", "Custom Classifiers", "Prior Sets"],
271
  label="Model Types",
272
  show_label=False,
273
  # info="Which model types to include.",
 
281
  visible=False,
282
  )
283
  rewardbench_table = gr.Dataframe(
284
+ regex_table(rewardbench_data_avg.copy(), "", ["Seq. Classifiers", "DPO", "Custom Classifiers", "Prior Sets"]).values,
285
  datatype=col_types_rewardbench_avg,
286
  headers=rewardbench_data_avg.columns.tolist(),
287
  elem_id="rewardbench_dataframe_avg",
 
291
  with gr.TabItem("πŸ” RewardBench - Detailed"):
292
  with gr.Row():
293
  search_2 = gr.Textbox(label="Model Search (delimit with , )", show_label=False, placeholder="Model Search (delimit with , )")
294
+ model_types_2 = gr.CheckboxGroup(["Seq. Classifiers", "DPO", "Custom Classifiers", "Generative"],
295
  value=["Seq. Classifiers", "DPO", "Custom Classifiers"],
296
  label="Model Types",
297
  show_label=False,
 
331
  with gr.TabItem("Prior Test Sets"):
332
  with gr.Row():
333
  search_3 = gr.Textbox(label="Model Search (delimit with , )", show_label=False, placeholder="Model Search (delimit with , )")
334
+ model_types_3 = gr.CheckboxGroup(["Seq. Classifiers", "DPO", "Custom Classifiers", "Generative"],
335
  value=["Seq. Classifiers", "DPO", "Custom Classifiers"],
336
  label="Model Types",
337
  show_label=False,