Spaces:

allenai
/

reward-bench

Running

App Files Files Community

natolambert commited on Feb 9, 2024

Commit

56fcfaf

1 Parent(s): 90eea3b

length experiment

Browse files

Files changed (2) hide show

app.py +72 -0
src/utils.py +5 -0

app.py CHANGED Viewed

@@ -63,14 +63,78 @@ def avg_over_herm(dataframe):
 def expand_subsets(dataframe):
     # TODO need to modify data/ script to do this
     pass
 herm_data = load_all_data(repo_dir_herm).sort_values(by='average', ascending=False)
 herm_data_avg = avg_over_herm(herm_data).sort_values(by='average', ascending=False)
 prefs_data = load_all_data(repo_dir_prefs).sort_values(by='average', ascending=False)
 # prefs_data_sub = expand_subsets(prefs_data).sort_values(by='average', ascending=False)
 col_types_herm = ["markdown"] + ["number"] * (len(herm_data.columns) - 1)
 col_types_herm_avg = ["markdown"] + ["number"] * (len(herm_data_avg.columns) - 1)
 col_types_prefs = ["markdown"] + ["number"] * (len(prefs_data.columns) - 1)
 # col_types_prefs_sub = ["markdown"] + ["number"] * (len(prefs_data_sub.columns) - 1)
@@ -114,6 +178,14 @@ with gr.Blocks() as app:
                     headers=herm_data.columns.tolist(),
                     elem_id="herm_dataframe",
                 )
         with gr.TabItem("Pref Sets - Overview"):
                 pref_sets_table = gr.Dataframe(
                     prefs_data.values,

 def expand_subsets(dataframe):
     # TODO need to modify data/ script to do this
     pass
+# reference for length bias categories
+length_categories = {
+    'alpacaeval-easy': 'True',
+    'alpacaeval-hard': 'True',
+    'alpacaeval-length': 'Neutral',
+    'donotanswer': 'False',
+    'hep-cpp': 'Neutral',
+    'hep-go': 'Neutral',
+    'hep-java': 'Neutral',
+    'hep-js': 'Neutral',
+    'hep-python': 'Neutral',
+    'hep-rust': 'Neutral',
+    'llmbar-adver-GPTInst': 'False',
+    'llmbar-adver-GPTOut': 'Neutral',
+    'llmbar-adver-manual': 'False',
+    'llmbar-adver-neighbor': 'False',
+    'llmbar-natural': 'Neutral',
+    'mt-bench-easy': 'False',
+    'mt-bench-hard': 'False',
+    'mt-bench-med': 'Neutral',
+    'refusals-dangerous': 'False',
+    'refusals-offensive': 'False',
+    'xstest-should-refuse': 'False',
+    'xstest-should-respond': 'True'
+}
+def length_bias_check(dataframe):
+    """
+    Takes the raw herm dataframe and splits the data into new buckets according to length_categories.
+    Then, take the average of the three buckets as "average"
+    """
+    new_df = dataframe.copy()
+    existing_subsets = new_df.columns[2:]
+    final_subsets = ["Length Bias", "Neutral", "Terse Bias"]
+    # new data is empty list dict for each final subset
+    new_data = {s: [] for s in final_subsets}
+    # now, subsets correspond to those with True, Nuetral, and False length bias
+    # check if length_categories[subset] == "True" or "False" or "Neutral"
+    for subset in existing_subsets:
+        subset_data = new_df[subset].values
+        subset_length = length_categories[subset]
+        # route to the correct bucket
+        if subset_length == "True":
+            new_data["Length Bias"].append(subset_data)
+        elif subset_length == "Neutral":
+            new_data["Neutral"].append(subset_data)
+        elif subset_length == "False":
+            new_data["Terse Bias"].append(subset_data)
+    # take average of new_data and add to new_df (removing other columns than model)
+    for subset in final_subsets:
+        new_df[subset] = np.round(np.nanmean(new_data[subset], axis=0), 2)
+    keep_columns = ["model"] + final_subsets
+    new_df = new_df[keep_columns]
+    # recompute average
+    # new_df["average"] = np.round(np.nanmean(new_df[final_subsets].values, axis=1), 2)
+    return new_df
 herm_data = load_all_data(repo_dir_herm).sort_values(by='average', ascending=False)
 herm_data_avg = avg_over_herm(herm_data).sort_values(by='average', ascending=False)
+herm_data_length = length_bias_check(herm_data).sort_values(by='Terse Bias', ascending=False)
 prefs_data = load_all_data(repo_dir_prefs).sort_values(by='average', ascending=False)
 # prefs_data_sub = expand_subsets(prefs_data).sort_values(by='average', ascending=False)
 col_types_herm = ["markdown"] + ["number"] * (len(herm_data.columns) - 1)
 col_types_herm_avg = ["markdown"] + ["number"] * (len(herm_data_avg.columns) - 1)
+cols_herm_data_length = ["markdown"] + ["number"] * (len(herm_data_length.columns) - 1)
 col_types_prefs = ["markdown"] + ["number"] * (len(prefs_data.columns) - 1)
 # col_types_prefs_sub = ["markdown"] + ["number"] * (len(prefs_data_sub.columns) - 1)
                     headers=herm_data.columns.tolist(),
                     elem_id="herm_dataframe",
                 )
+        with gr.TabItem("HERM - Length Bias"):
+            with gr.Row():
+                herm_table = gr.Dataframe(
+                    herm_data_length.values,
+                    datatype=cols_herm_data_length,
+                    headers=herm_data_length.columns.tolist(),
+                    elem_id="herm_dataframe_length",
+                )
         with gr.TabItem("Pref Sets - Overview"):
                 pref_sets_table = gr.Dataframe(
                     prefs_data.values,

src/utils.py CHANGED Viewed

@@ -62,4 +62,9 @@ def load_all_data(data_repo, subsubsets=False):    # use HF api to pull the git
     cols = list(df.columns)
     cols.insert(1, cols.pop(cols.index('average')))
     df = df.loc[:, cols]
     return df

     cols = list(df.columns)
     cols.insert(1, cols.pop(cols.index('average')))
     df = df.loc[:, cols]
+    # remove columns xstest (outdated data)
+    # if xstest is a column
+    if "xstest" in df.columns:
+        df = df.drop(columns=["xstest"])
     return df