Spaces:

allenai
/

reward-bench

Running

App Files Files Community

natolambert commited on Mar 14, 2024

Commit

521165c

1 Parent(s): 8ac8bdc

rounding

Browse files

Files changed (2) hide show

app.py +6 -6
src/utils.py +2 -2

app.py CHANGED Viewed

@@ -52,7 +52,7 @@ def avg_over_rewardbench(dataframe_core, dataframe_prefs):
         subset_cols = [col for col in new_df.columns if col in sub_subsets]
         sub_data = new_df[subset_cols].values # take the relevant column values
         sub_counts = [example_counts[s] for s in sub_subsets] # take the example counts
-        new_df[subset] = np.round(np.average(sub_data, axis=1, weights=sub_counts), 2) # take the weighted average
         # new_df[subset] = np.round(np.nanmean(new_df[subset_cols].values, axis=1), 2)
     data_cols = list(subset_mapping.keys())
@@ -65,7 +65,7 @@ def avg_over_rewardbench(dataframe_core, dataframe_prefs):
     pref_data = dataframe_prefs[pref_columns].values
     # add column test sets knowing the rows are not identical, take superset
-    dataframe_prefs["Prior Sets"] = np.round(np.nanmean(pref_data, axis=1), 2)
     # add column Test Sets empty to new_df
     new_df["Prior Sets"] = np.nan
@@ -83,7 +83,7 @@ def avg_over_rewardbench(dataframe_core, dataframe_prefs):
     # add total average
     data_cols += ["Prior Sets"]
-    new_df["average"] = np.round(np.nanmean(new_df[data_cols].values, axis=1), 2)
     # make average third column
     keep_columns = ["model", "model_type", "average"] + data_cols
@@ -207,15 +207,15 @@ def regex_table(dataframe, regex, filter_button):
 with gr.Blocks(css=custom_css) as app:
     # create tabs for the app, moving the current table to one titled "rewardbench" and the benchmark_text to a tab called "About"
     with gr.Row():
-        with gr.Column(scale=3):
             # search = gr.Textbox(label="Model Search (delimit with , )", placeholder="Regex search for a model")
             # filter_button = gr.Checkbox(label="Include AI2 training runs (or type ai2 above).", interactive=True)
             # img = gr.Image(value="https://private-user-images.githubusercontent.com/10695622/310698241-24ed272a-0844-451f-b414-fde57478703e.png", width=500)
             gr.Markdown("""
                         ![](file/src/logo.png)
                         """)
-        with gr.Column(scale=6):
-            gr.Markdown(TOP_TEXT)
     with gr.Tabs(elem_classes="tab-buttons") as tabs:
         with gr.TabItem("🏆 RewardBench Leaderboard"):
             with gr.Row():

         subset_cols = [col for col in new_df.columns if col in sub_subsets]
         sub_data = new_df[subset_cols].values # take the relevant column values
         sub_counts = [example_counts[s] for s in sub_subsets] # take the example counts
+        new_df[subset] = np.round(np.average(sub_data, axis=1, weights=sub_counts), 1) # take the weighted average
         # new_df[subset] = np.round(np.nanmean(new_df[subset_cols].values, axis=1), 2)
     data_cols = list(subset_mapping.keys())
     pref_data = dataframe_prefs[pref_columns].values
     # add column test sets knowing the rows are not identical, take superset
+    dataframe_prefs["Prior Sets"] = np.round(np.nanmean(pref_data, axis=1), 1)
     # add column Test Sets empty to new_df
     new_df["Prior Sets"] = np.nan
     # add total average
     data_cols += ["Prior Sets"]
+    new_df["average"] = np.round(np.nanmean(new_df[data_cols].values, axis=1), 1)
     # make average third column
     keep_columns = ["model", "model_type", "average"] + data_cols
 with gr.Blocks(css=custom_css) as app:
     # create tabs for the app, moving the current table to one titled "rewardbench" and the benchmark_text to a tab called "About"
     with gr.Row():
+        with gr.Column(scale=6):
+            gr.Markdown(TOP_TEXT)
+        with gr.Column(scale=4):
             # search = gr.Textbox(label="Model Search (delimit with , )", placeholder="Regex search for a model")
             # filter_button = gr.Checkbox(label="Include AI2 training runs (or type ai2 above).", interactive=True)
             # img = gr.Image(value="https://private-user-images.githubusercontent.com/10695622/310698241-24ed272a-0844-451f-b414-fde57478703e.png", width=500)
             gr.Markdown("""
                         ![](file/src/logo.png)
                         """)
     with gr.Tabs(elem_classes="tab-buttons") as tabs:
         with gr.TabItem("🏆 RewardBench Leaderboard"):
             with gr.Row():

src/utils.py CHANGED Viewed

@@ -97,8 +97,8 @@ def load_all_data(data_repo, subdir:str, subsubsets=False):    # use HF api to p
         cols.remove("pku_safer")
     # round
-    df[cols] = df[cols].round(2)
-    avg = np.nanmean(df[cols].values,axis=1).round(2)
     # add average column
     df["average"] = avg

         cols.remove("pku_safer")
     # round
+    df[cols] = (df[cols]*100).round(1)
+    avg = np.nanmean(df[cols].values,axis=1).round(1)
     # add average column
     df["average"] = avg