natolambert commited on
Commit
f89f357
β€’
1 Parent(s): bd17252
Files changed (1) hide show
  1. app.py +33 -14
app.py CHANGED
@@ -138,10 +138,24 @@ prefs_data = load_all_data(repo_dir_rewardbench, subdir="pref-sets").sort_values
138
 
139
  rewardbench_data_avg = avg_over_rewardbench(rewardbench_data, prefs_data).sort_values(by='average', ascending=False)
140
 
141
- col_types_rewardbench = ["markdown"] + ["str"] + ["number"] * (len(rewardbench_data.columns) - 1)
142
- col_types_rewardbench_avg = ["markdown"]+ ["str"] + ["number"] * (len(rewardbench_data_avg.columns) - 1)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
143
  cols_rewardbench_data_length = ["markdown"] + ["number"] * (len(rewardbench_data_length.columns) - 1)
144
- col_types_prefs = ["markdown"] + ["number"] * (len(prefs_data.columns) - 1)
145
  # col_types_prefs_sub = ["markdown"] + ["number"] * (len(prefs_data_sub.columns) - 1)
146
 
147
  # for showing random samples
@@ -175,36 +189,39 @@ def regex_table(dataframe, regex, filter_button):
175
  # if filter_button, remove all rows with "ai2" in the model name
176
  if isinstance(filter_button, list) or isinstance(filter_button, str):
177
  if "AI2 Experiments" not in filter_button and ("ai2" not in regex):
178
- dataframe = dataframe[~dataframe["model"].str.contains("ai2", case=False, na=False)]
179
  if "Seq. Classifiers" not in filter_button:
180
- dataframe = dataframe[~dataframe["model_type"].str.contains("Seq. Classifier", case=False, na=False)]
181
  if "DPO" not in filter_button:
182
- dataframe = dataframe[~dataframe["model_type"].str.contains("DPO", case=False, na=False)]
183
  if "Custom Classifiers" not in filter_button:
184
- dataframe = dataframe[~dataframe["model_type"].str.contains("Custom Classifier", case=False, na=False)]
185
  # Filter the dataframe such that 'model' contains any of the regex patterns
186
- return dataframe[dataframe["model"].str.contains(combined_regex, case=False, na=False)]
187
 
188
 
189
  with gr.Blocks(css=custom_css) as app:
190
  # create tabs for the app, moving the current table to one titled "rewardbench" and the benchmark_text to a tab called "About"
191
  with gr.Row():
192
- with gr.Column(scale=1.65):
193
  # search = gr.Textbox(label="Model Search (delimit with , )", placeholder="Regex search for a model")
194
  # filter_button = gr.Checkbox(label="Include AI2 training runs (or type ai2 above).", interactive=True)
195
  # img = gr.Image(value="https://private-user-images.githubusercontent.com/10695622/310698241-24ed272a-0844-451f-b414-fde57478703e.png", width=500)
196
  gr.Markdown("""
197
  ![](file/src/logo.png)
198
  """)
199
- with gr.Column(scale=3):
200
  gr.Markdown(TOP_TEXT)
201
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
202
  with gr.TabItem("πŸ† RewardBench Leaderboard"):
203
  with gr.Row():
204
- search_1 = gr.Textbox(label="Model Search (delimit with , )", placeholder="Regex search for a model")
 
 
205
  model_types_1 = gr.CheckboxGroup(["Seq. Classifiers", "DPO", "Custom Classifiers", "AI2 Experiments"],
206
  value=["Seq. Classifiers", "DPO", "Custom Classifiers"],
207
  label="Model Types",
 
208
  # info="Which model types to include.",
209
  )
210
  with gr.Row():
@@ -225,10 +242,11 @@ with gr.Blocks(css=custom_css) as app:
225
 
226
  with gr.TabItem("πŸ” RewardBench - Detailed"):
227
  with gr.Row():
228
- search_2 = gr.Textbox(label="Model Search (delimit with , )", placeholder="Regex search for a model")
229
  model_types_2 = gr.CheckboxGroup(["Seq. Classifiers", "DPO", "Custom Classifiers", "AI2 Experiments"],
230
  value=["Seq. Classifiers", "DPO", "Custom Classifiers"],
231
  label="Model Types",
 
232
  # info="Which model types to include."
233
  )
234
  with gr.Row():
@@ -264,10 +282,11 @@ with gr.Blocks(css=custom_css) as app:
264
  # )
265
  with gr.TabItem("Existing Test Sets"):
266
  with gr.Row():
267
- search_3 = gr.Textbox(label="Model Search (delimit with , )", placeholder="Regex search for a model")
268
  model_types_3 = gr.CheckboxGroup(["Seq. Classifiers", "DPO", "Custom Classifiers", "AI2 Experiments"],
269
  value=["Seq. Classifiers", "DPO", "Custom Classifiers"],
270
- label="Model Types",
 
271
  # info="Which model types to include.",
272
  )
273
  with gr.Row():
 
138
 
139
  rewardbench_data_avg = avg_over_rewardbench(rewardbench_data, prefs_data).sort_values(by='average', ascending=False)
140
 
141
+ def prep_df(df):
142
+ # add column to 0th entry with count (column name itself empty)
143
+ df.insert(0, '', range(1, 1 + len(df)))
144
+
145
+ # replace "model" with "Model" and "model_type" with "Model Type" and "average" with "Average"
146
+ df = df.rename(columns={"model": "Model", "model_type": "Model Type", "average": "Average"})
147
+ return df
148
+
149
+ # add count column to all dataframes
150
+ rewardbench_data = prep_df(rewardbench_data)
151
+ rewardbench_data_avg = prep_df(rewardbench_data_avg)
152
+ rewardbench_data_length = prep_df(rewardbench_data_length)
153
+ prefs_data = prep_df(prefs_data)
154
+
155
+ col_types_rewardbench = ["number"] + ["markdown"] + ["str"] + ["number"] * (len(rewardbench_data.columns) - 1)
156
+ col_types_rewardbench_avg = ["number"] + ["markdown"]+ ["str"] + ["number"] * (len(rewardbench_data_avg.columns) - 1)
157
  cols_rewardbench_data_length = ["markdown"] + ["number"] * (len(rewardbench_data_length.columns) - 1)
158
+ col_types_prefs = ["number"] + ["markdown"] + ["number"] * (len(prefs_data.columns) - 1)
159
  # col_types_prefs_sub = ["markdown"] + ["number"] * (len(prefs_data_sub.columns) - 1)
160
 
161
  # for showing random samples
 
189
  # if filter_button, remove all rows with "ai2" in the model name
190
  if isinstance(filter_button, list) or isinstance(filter_button, str):
191
  if "AI2 Experiments" not in filter_button and ("ai2" not in regex):
192
+ dataframe = dataframe[~dataframe["Model"].str.contains("ai2", case=False, na=False)]
193
  if "Seq. Classifiers" not in filter_button:
194
+ dataframe = dataframe[~dataframe["Model Type"].str.contains("Seq. Classifier", case=False, na=False)]
195
  if "DPO" not in filter_button:
196
+ dataframe = dataframe[~dataframe["Model Type"].str.contains("DPO", case=False, na=False)]
197
  if "Custom Classifiers" not in filter_button:
198
+ dataframe = dataframe[~dataframe["Model Type"].str.contains("Custom Classifier", case=False, na=False)]
199
  # Filter the dataframe such that 'model' contains any of the regex patterns
200
+ return dataframe[dataframe["Model"].str.contains(combined_regex, case=False, na=False)]
201
 
202
 
203
  with gr.Blocks(css=custom_css) as app:
204
  # create tabs for the app, moving the current table to one titled "rewardbench" and the benchmark_text to a tab called "About"
205
  with gr.Row():
206
+ with gr.Column(scale=3):
207
  # search = gr.Textbox(label="Model Search (delimit with , )", placeholder="Regex search for a model")
208
  # filter_button = gr.Checkbox(label="Include AI2 training runs (or type ai2 above).", interactive=True)
209
  # img = gr.Image(value="https://private-user-images.githubusercontent.com/10695622/310698241-24ed272a-0844-451f-b414-fde57478703e.png", width=500)
210
  gr.Markdown("""
211
  ![](file/src/logo.png)
212
  """)
213
+ with gr.Column(scale=6):
214
  gr.Markdown(TOP_TEXT)
215
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
216
  with gr.TabItem("πŸ† RewardBench Leaderboard"):
217
  with gr.Row():
218
+ search_1 = gr.Textbox(label="Model Search (delimit with , )",
219
+ placeholder="Model Search (delimit with , )",
220
+ show_label=False)
221
  model_types_1 = gr.CheckboxGroup(["Seq. Classifiers", "DPO", "Custom Classifiers", "AI2 Experiments"],
222
  value=["Seq. Classifiers", "DPO", "Custom Classifiers"],
223
  label="Model Types",
224
+ show_label=False,
225
  # info="Which model types to include.",
226
  )
227
  with gr.Row():
 
242
 
243
  with gr.TabItem("πŸ” RewardBench - Detailed"):
244
  with gr.Row():
245
+ search_2 = gr.Textbox(label="Model Search (delimit with , )", show_label=False, placeholder="Model Search (delimit with , )")
246
  model_types_2 = gr.CheckboxGroup(["Seq. Classifiers", "DPO", "Custom Classifiers", "AI2 Experiments"],
247
  value=["Seq. Classifiers", "DPO", "Custom Classifiers"],
248
  label="Model Types",
249
+ show_label=False,
250
  # info="Which model types to include."
251
  )
252
  with gr.Row():
 
282
  # )
283
  with gr.TabItem("Existing Test Sets"):
284
  with gr.Row():
285
+ search_3 = gr.Textbox(label="Model Search (delimit with , )", show_label=False, placeholder="Model Search (delimit with , )")
286
  model_types_3 = gr.CheckboxGroup(["Seq. Classifiers", "DPO", "Custom Classifiers", "AI2 Experiments"],
287
  value=["Seq. Classifiers", "DPO", "Custom Classifiers"],
288
+ label="Model Types",
289
+ show_label=False,
290
  # info="Which model types to include.",
291
  )
292
  with gr.Row():