Spaces:

allenai
/

reward-bench

Running

App Files Files Community

natolambert commited on Mar 7, 2024

Commit

f89f357

1 Parent(s): bd17252

nits

Browse files

Files changed (1) hide show

app.py +33 -14

app.py CHANGED Viewed

@@ -138,10 +138,24 @@ prefs_data = load_all_data(repo_dir_rewardbench, subdir="pref-sets").sort_values
 rewardbench_data_avg = avg_over_rewardbench(rewardbench_data, prefs_data).sort_values(by='average', ascending=False)
-col_types_rewardbench = ["markdown"] + ["str"] + ["number"] * (len(rewardbench_data.columns) - 1)
-col_types_rewardbench_avg = ["markdown"]+ ["str"] + ["number"] * (len(rewardbench_data_avg.columns) - 1)
 cols_rewardbench_data_length = ["markdown"] + ["number"] * (len(rewardbench_data_length.columns) - 1)
-col_types_prefs = ["markdown"] + ["number"] * (len(prefs_data.columns) - 1)
 # col_types_prefs_sub = ["markdown"] + ["number"] * (len(prefs_data_sub.columns) - 1)
 # for showing random samples
@@ -175,36 +189,39 @@ def regex_table(dataframe, regex, filter_button):
     # if filter_button, remove all rows with "ai2" in the model name
     if isinstance(filter_button, list) or isinstance(filter_button, str):
         if "AI2 Experiments" not in filter_button and ("ai2" not in regex):
-            dataframe = dataframe[~dataframe["model"].str.contains("ai2", case=False, na=False)]
         if "Seq. Classifiers" not in filter_button:
-            dataframe = dataframe[~dataframe["model_type"].str.contains("Seq. Classifier", case=False, na=False)]
         if "DPO" not in filter_button:
-            dataframe = dataframe[~dataframe["model_type"].str.contains("DPO", case=False, na=False)]
         if "Custom Classifiers" not in filter_button:
-            dataframe = dataframe[~dataframe["model_type"].str.contains("Custom Classifier", case=False, na=False)]
     # Filter the dataframe such that 'model' contains any of the regex patterns
-    return dataframe[dataframe["model"].str.contains(combined_regex, case=False, na=False)]
 with gr.Blocks(css=custom_css) as app:
     # create tabs for the app, moving the current table to one titled "rewardbench" and the benchmark_text to a tab called "About"
     with gr.Row():
-        with gr.Column(scale=1.65):
             # search = gr.Textbox(label="Model Search (delimit with , )", placeholder="Regex search for a model")
             # filter_button = gr.Checkbox(label="Include AI2 training runs (or type ai2 above).", interactive=True)
             # img = gr.Image(value="https://private-user-images.githubusercontent.com/10695622/310698241-24ed272a-0844-451f-b414-fde57478703e.png", width=500)
             gr.Markdown("""
                         ![](file/src/logo.png)
                         """)
-        with gr.Column(scale=3):
             gr.Markdown(TOP_TEXT)
     with gr.Tabs(elem_classes="tab-buttons") as tabs:
         with gr.TabItem("🏆 RewardBench Leaderboard"):
             with gr.Row():
-                search_1 = gr.Textbox(label="Model Search (delimit with , )", placeholder="Regex search for a model")
                 model_types_1 = gr.CheckboxGroup(["Seq. Classifiers", "DPO", "Custom Classifiers", "AI2 Experiments"],
                                                  value=["Seq. Classifiers", "DPO", "Custom Classifiers"],
                                                  label="Model Types",
                                                 #  info="Which model types to include.",
                                                  )
             with gr.Row():
@@ -225,10 +242,11 @@ with gr.Blocks(css=custom_css) as app:
         with gr.TabItem("🔍 RewardBench - Detailed"):
             with gr.Row():
-                search_2 = gr.Textbox(label="Model Search (delimit with , )", placeholder="Regex search for a model")
                 model_types_2 = gr.CheckboxGroup(["Seq. Classifiers", "DPO", "Custom Classifiers", "AI2 Experiments"],
                                                  value=["Seq. Classifiers", "DPO", "Custom Classifiers"],
                                                  label="Model Types",
                                                 #  info="Which model types to include."
                                                  )
             with gr.Row():
@@ -264,10 +282,11 @@ with gr.Blocks(css=custom_css) as app:
         #         )
         with gr.TabItem("Existing Test Sets"):
             with gr.Row():
-                search_3 = gr.Textbox(label="Model Search (delimit with , )", placeholder="Regex search for a model")
                 model_types_3 = gr.CheckboxGroup(["Seq. Classifiers", "DPO", "Custom Classifiers", "AI2 Experiments"],
                                                  value=["Seq. Classifiers", "DPO", "Custom Classifiers"],
-                                                 label="Model Types",
                                                 #  info="Which model types to include.",
                                                  )
             with gr.Row():

 rewardbench_data_avg = avg_over_rewardbench(rewardbench_data, prefs_data).sort_values(by='average', ascending=False)
+def prep_df(df):
+    # add column to 0th entry with count (column name itself empty)
+    df.insert(0, '', range(1, 1 + len(df)))
+    # replace "model" with "Model" and "model_type" with "Model Type" and "average" with "Average"
+    df = df.rename(columns={"model": "Model", "model_type": "Model Type", "average": "Average"})
+    return df
+# add count column to all dataframes
+rewardbench_data = prep_df(rewardbench_data)
+rewardbench_data_avg = prep_df(rewardbench_data_avg)
+rewardbench_data_length = prep_df(rewardbench_data_length)
+prefs_data = prep_df(prefs_data)
+col_types_rewardbench = ["number"] + ["markdown"] + ["str"] + ["number"] * (len(rewardbench_data.columns) - 1)
+col_types_rewardbench_avg = ["number"] + ["markdown"]+ ["str"] + ["number"] * (len(rewardbench_data_avg.columns) - 1)
 cols_rewardbench_data_length = ["markdown"] + ["number"] * (len(rewardbench_data_length.columns) - 1)
+col_types_prefs = ["number"] + ["markdown"] + ["number"] * (len(prefs_data.columns) - 1)
 # col_types_prefs_sub = ["markdown"] + ["number"] * (len(prefs_data_sub.columns) - 1)
 # for showing random samples
     # if filter_button, remove all rows with "ai2" in the model name
     if isinstance(filter_button, list) or isinstance(filter_button, str):
         if "AI2 Experiments" not in filter_button and ("ai2" not in regex):
+            dataframe = dataframe[~dataframe["Model"].str.contains("ai2", case=False, na=False)]
         if "Seq. Classifiers" not in filter_button:
+            dataframe = dataframe[~dataframe["Model Type"].str.contains("Seq. Classifier", case=False, na=False)]
         if "DPO" not in filter_button:
+            dataframe = dataframe[~dataframe["Model Type"].str.contains("DPO", case=False, na=False)]
         if "Custom Classifiers" not in filter_button:
+            dataframe = dataframe[~dataframe["Model Type"].str.contains("Custom Classifier", case=False, na=False)]
     # Filter the dataframe such that 'model' contains any of the regex patterns
+    return dataframe[dataframe["Model"].str.contains(combined_regex, case=False, na=False)]
 with gr.Blocks(css=custom_css) as app:
     # create tabs for the app, moving the current table to one titled "rewardbench" and the benchmark_text to a tab called "About"
     with gr.Row():
+        with gr.Column(scale=3):
             # search = gr.Textbox(label="Model Search (delimit with , )", placeholder="Regex search for a model")
             # filter_button = gr.Checkbox(label="Include AI2 training runs (or type ai2 above).", interactive=True)
             # img = gr.Image(value="https://private-user-images.githubusercontent.com/10695622/310698241-24ed272a-0844-451f-b414-fde57478703e.png", width=500)
             gr.Markdown("""
                         ![](file/src/logo.png)
                         """)
+        with gr.Column(scale=6):
             gr.Markdown(TOP_TEXT)
     with gr.Tabs(elem_classes="tab-buttons") as tabs:
         with gr.TabItem("🏆 RewardBench Leaderboard"):
             with gr.Row():
+                search_1 = gr.Textbox(label="Model Search (delimit with , )",
+                                      placeholder="Model Search (delimit with , )",
+                                      show_label=False)
                 model_types_1 = gr.CheckboxGroup(["Seq. Classifiers", "DPO", "Custom Classifiers", "AI2 Experiments"],
                                                  value=["Seq. Classifiers", "DPO", "Custom Classifiers"],
                                                  label="Model Types",
+                                                 show_label=False,
                                                 #  info="Which model types to include.",
                                                  )
             with gr.Row():
         with gr.TabItem("🔍 RewardBench - Detailed"):
             with gr.Row():
+                search_2 = gr.Textbox(label="Model Search (delimit with , )", show_label=False, placeholder="Model Search (delimit with , )")
                 model_types_2 = gr.CheckboxGroup(["Seq. Classifiers", "DPO", "Custom Classifiers", "AI2 Experiments"],
                                                  value=["Seq. Classifiers", "DPO", "Custom Classifiers"],
                                                  label="Model Types",
+                                                 show_label=False,
                                                 #  info="Which model types to include."
                                                  )
             with gr.Row():
         #         )
         with gr.TabItem("Existing Test Sets"):
             with gr.Row():
+                search_3 = gr.Textbox(label="Model Search (delimit with , )", show_label=False, placeholder="Model Search (delimit with , )")
                 model_types_3 = gr.CheckboxGroup(["Seq. Classifiers", "DPO", "Custom Classifiers", "AI2 Experiments"],
                                                  value=["Seq. Classifiers", "DPO", "Custom Classifiers"],
+                                                 label="Model Types",
+                                                 show_label=False,
                                                 #  info="Which model types to include.",
                                                  )
             with gr.Row():