Spaces:

allenai
/

reward-bench

Running

App Files Files Community

natolambert commited on May 22

Commit

0de05c0

•

1 Parent(s): 6ce351e

up

Browse files

Files changed (1) hide show

app.py +18 -7

app.py CHANGED Viewed

@@ -202,10 +202,14 @@ def regex_table(dataframe, regex, filter_button):
     # Join the list into a single regex pattern with '|' acting as OR
     combined_regex = '|'.join(regex_list)
     # if filter_button, remove all rows with "ai2" in the model name
     if isinstance(filter_button, list) or isinstance(filter_button, str):
-        if "AI2 Experiments" not in filter_button and ("ai2" not in regex):
-            dataframe = dataframe[~dataframe["Model"].str.contains("ai2", case=False, na=False)]
         if "Seq. Classifiers" not in filter_button:
             dataframe = dataframe[~dataframe["Model Type"].str.contains("Seq. Classifier", case=False, na=False)]
         if "DPO" not in filter_button:
@@ -220,6 +224,13 @@ def regex_table(dataframe, regex, filter_button):
     # replace column '' with count/rank
     data[''] = np.arange(1, 1 + len(data))
     # if Score exists, round to 2 decimals
     if "Score" in data.columns:
         data["Score"] = np.round(np.array(data["Score"].values).astype(float), 2)
@@ -255,8 +266,8 @@ with gr.Blocks(css=custom_css) as app:
                 search_1 = gr.Textbox(label="Model Search (delimit with , )",
                                       placeholder="Model Search (delimit with , )",
                                       show_label=False)
-                model_types_1 = gr.CheckboxGroup(["Seq. Classifiers", "DPO", "Custom Classifiers", "Generative", "AI2 Experiments"],
-                                                 value=["Seq. Classifiers", "DPO", "Custom Classifiers"],
                                                  label="Model Types",
                                                  show_label=False,
                                                 #  info="Which model types to include.",
@@ -270,7 +281,7 @@ with gr.Blocks(css=custom_css) as app:
                     visible=False,
                 )
                 rewardbench_table = gr.Dataframe(
-                    regex_table(rewardbench_data_avg.copy(), "", ["Seq. Classifiers", "DPO", "Custom Classifiers"]).values,
                     datatype=col_types_rewardbench_avg,
                     headers=rewardbench_data_avg.columns.tolist(),
                     elem_id="rewardbench_dataframe_avg",
@@ -280,7 +291,7 @@ with gr.Blocks(css=custom_css) as app:
         with gr.TabItem("🔍 RewardBench - Detailed"):
             with gr.Row():
                 search_2 = gr.Textbox(label="Model Search (delimit with , )", show_label=False, placeholder="Model Search (delimit with , )")
-                model_types_2 = gr.CheckboxGroup(["Seq. Classifiers", "DPO", "Custom Classifiers", "Generative", "AI2 Experiments"],
                                                  value=["Seq. Classifiers", "DPO", "Custom Classifiers"],
                                                  label="Model Types",
                                                  show_label=False,
@@ -320,7 +331,7 @@ with gr.Blocks(css=custom_css) as app:
         with gr.TabItem("Prior Test Sets"):
             with gr.Row():
                 search_3 = gr.Textbox(label="Model Search (delimit with , )", show_label=False, placeholder="Model Search (delimit with , )")
-                model_types_3 = gr.CheckboxGroup(["Seq. Classifiers", "DPO", "Custom Classifiers", "Generative", "AI2 Experiments"],
                                                  value=["Seq. Classifiers", "DPO", "Custom Classifiers"],
                                                  label="Model Types",
                                                  show_label=False,

     # Join the list into a single regex pattern with '|' acting as OR
     combined_regex = '|'.join(regex_list)
+    # remove internal ai2 data
+    dataframe = dataframe[~dataframe["Model"].str.contains("ai2", case=False, na=False)]
     # if filter_button, remove all rows with "ai2" in the model name
+    update_scores = False
     if isinstance(filter_button, list) or isinstance(filter_button, str):
+        if "Prior Sets" not in filter_button and 'Prior Sets (0.5 weight)' in dataframe.columns:
+            update_scores = True
         if "Seq. Classifiers" not in filter_button:
             dataframe = dataframe[~dataframe["Model Type"].str.contains("Seq. Classifier", case=False, na=False)]
         if "DPO" not in filter_button:
     # replace column '' with count/rank
     data[''] = np.arange(1, 1 + len(data))
+    # if update the score to not use prior sets, do so
+    if update_scores:
+        data["Score"] = (data["Chat"] + data["Chat Hard"] + data["Safety"] + data["Reasoning"]) / 4
+        data["Prior Sets (0.5 weight)"] = np.NaN
+        # sort array by Score column
+        data = data.sort_values(by='Score', ascending=False)
     # if Score exists, round to 2 decimals
     if "Score" in data.columns:
         data["Score"] = np.round(np.array(data["Score"].values).astype(float), 2)
                 search_1 = gr.Textbox(label="Model Search (delimit with , )",
                                       placeholder="Model Search (delimit with , )",
                                       show_label=False)
+                model_types_1 = gr.CheckboxGroup(["Seq. Classifiers", "DPO", "Custom Classifiers", "Generative", "Prior Sets"],
+                                                 value=["Seq. Classifiers", "DPO", "Custom Classifiers", "Prior Sets"],
                                                  label="Model Types",
                                                  show_label=False,
                                                 #  info="Which model types to include.",
                     visible=False,
                 )
                 rewardbench_table = gr.Dataframe(
+                    regex_table(rewardbench_data_avg.copy(), "", ["Seq. Classifiers", "DPO", "Custom Classifiers", "Prior Sets"]).values,
                     datatype=col_types_rewardbench_avg,
                     headers=rewardbench_data_avg.columns.tolist(),
                     elem_id="rewardbench_dataframe_avg",
         with gr.TabItem("🔍 RewardBench - Detailed"):
             with gr.Row():
                 search_2 = gr.Textbox(label="Model Search (delimit with , )", show_label=False, placeholder="Model Search (delimit with , )")
+                model_types_2 = gr.CheckboxGroup(["Seq. Classifiers", "DPO", "Custom Classifiers", "Generative"],
                                                  value=["Seq. Classifiers", "DPO", "Custom Classifiers"],
                                                  label="Model Types",
                                                  show_label=False,
         with gr.TabItem("Prior Test Sets"):
             with gr.Row():
                 search_3 = gr.Textbox(label="Model Search (delimit with , )", show_label=False, placeholder="Model Search (delimit with , )")
+                model_types_3 = gr.CheckboxGroup(["Seq. Classifiers", "DPO", "Custom Classifiers", "Generative"],
                                                  value=["Seq. Classifiers", "DPO", "Custom Classifiers"],
                                                  label="Model Types",
                                                  show_label=False,