Spaces:
Running
Running
natolambert
commited on
Commit
β’
521165c
1
Parent(s):
8ac8bdc
rounding
Browse files- app.py +6 -6
- src/utils.py +2 -2
app.py
CHANGED
@@ -52,7 +52,7 @@ def avg_over_rewardbench(dataframe_core, dataframe_prefs):
|
|
52 |
subset_cols = [col for col in new_df.columns if col in sub_subsets]
|
53 |
sub_data = new_df[subset_cols].values # take the relevant column values
|
54 |
sub_counts = [example_counts[s] for s in sub_subsets] # take the example counts
|
55 |
-
new_df[subset] = np.round(np.average(sub_data, axis=1, weights=sub_counts),
|
56 |
# new_df[subset] = np.round(np.nanmean(new_df[subset_cols].values, axis=1), 2)
|
57 |
|
58 |
data_cols = list(subset_mapping.keys())
|
@@ -65,7 +65,7 @@ def avg_over_rewardbench(dataframe_core, dataframe_prefs):
|
|
65 |
pref_data = dataframe_prefs[pref_columns].values
|
66 |
|
67 |
# add column test sets knowing the rows are not identical, take superset
|
68 |
-
dataframe_prefs["Prior Sets"] = np.round(np.nanmean(pref_data, axis=1),
|
69 |
|
70 |
# add column Test Sets empty to new_df
|
71 |
new_df["Prior Sets"] = np.nan
|
@@ -83,7 +83,7 @@ def avg_over_rewardbench(dataframe_core, dataframe_prefs):
|
|
83 |
|
84 |
# add total average
|
85 |
data_cols += ["Prior Sets"]
|
86 |
-
new_df["average"] = np.round(np.nanmean(new_df[data_cols].values, axis=1),
|
87 |
|
88 |
# make average third column
|
89 |
keep_columns = ["model", "model_type", "average"] + data_cols
|
@@ -207,15 +207,15 @@ def regex_table(dataframe, regex, filter_button):
|
|
207 |
with gr.Blocks(css=custom_css) as app:
|
208 |
# create tabs for the app, moving the current table to one titled "rewardbench" and the benchmark_text to a tab called "About"
|
209 |
with gr.Row():
|
210 |
-
with gr.Column(scale=
|
|
|
|
|
211 |
# search = gr.Textbox(label="Model Search (delimit with , )", placeholder="Regex search for a model")
|
212 |
# filter_button = gr.Checkbox(label="Include AI2 training runs (or type ai2 above).", interactive=True)
|
213 |
# img = gr.Image(value="https://private-user-images.githubusercontent.com/10695622/310698241-24ed272a-0844-451f-b414-fde57478703e.png", width=500)
|
214 |
gr.Markdown("""
|
215 |
![](file/src/logo.png)
|
216 |
""")
|
217 |
-
with gr.Column(scale=6):
|
218 |
-
gr.Markdown(TOP_TEXT)
|
219 |
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
220 |
with gr.TabItem("π RewardBench Leaderboard"):
|
221 |
with gr.Row():
|
|
|
52 |
subset_cols = [col for col in new_df.columns if col in sub_subsets]
|
53 |
sub_data = new_df[subset_cols].values # take the relevant column values
|
54 |
sub_counts = [example_counts[s] for s in sub_subsets] # take the example counts
|
55 |
+
new_df[subset] = np.round(np.average(sub_data, axis=1, weights=sub_counts), 1) # take the weighted average
|
56 |
# new_df[subset] = np.round(np.nanmean(new_df[subset_cols].values, axis=1), 2)
|
57 |
|
58 |
data_cols = list(subset_mapping.keys())
|
|
|
65 |
pref_data = dataframe_prefs[pref_columns].values
|
66 |
|
67 |
# add column test sets knowing the rows are not identical, take superset
|
68 |
+
dataframe_prefs["Prior Sets"] = np.round(np.nanmean(pref_data, axis=1), 1)
|
69 |
|
70 |
# add column Test Sets empty to new_df
|
71 |
new_df["Prior Sets"] = np.nan
|
|
|
83 |
|
84 |
# add total average
|
85 |
data_cols += ["Prior Sets"]
|
86 |
+
new_df["average"] = np.round(np.nanmean(new_df[data_cols].values, axis=1), 1)
|
87 |
|
88 |
# make average third column
|
89 |
keep_columns = ["model", "model_type", "average"] + data_cols
|
|
|
207 |
with gr.Blocks(css=custom_css) as app:
|
208 |
# create tabs for the app, moving the current table to one titled "rewardbench" and the benchmark_text to a tab called "About"
|
209 |
with gr.Row():
|
210 |
+
with gr.Column(scale=6):
|
211 |
+
gr.Markdown(TOP_TEXT)
|
212 |
+
with gr.Column(scale=4):
|
213 |
# search = gr.Textbox(label="Model Search (delimit with , )", placeholder="Regex search for a model")
|
214 |
# filter_button = gr.Checkbox(label="Include AI2 training runs (or type ai2 above).", interactive=True)
|
215 |
# img = gr.Image(value="https://private-user-images.githubusercontent.com/10695622/310698241-24ed272a-0844-451f-b414-fde57478703e.png", width=500)
|
216 |
gr.Markdown("""
|
217 |
![](file/src/logo.png)
|
218 |
""")
|
|
|
|
|
219 |
with gr.Tabs(elem_classes="tab-buttons") as tabs:
|
220 |
with gr.TabItem("π RewardBench Leaderboard"):
|
221 |
with gr.Row():
|
src/utils.py
CHANGED
@@ -97,8 +97,8 @@ def load_all_data(data_repo, subdir:str, subsubsets=False): # use HF api to p
|
|
97 |
cols.remove("pku_safer")
|
98 |
|
99 |
# round
|
100 |
-
df[cols] = df[cols].round(
|
101 |
-
avg = np.nanmean(df[cols].values,axis=1).round(
|
102 |
# add average column
|
103 |
df["average"] = avg
|
104 |
|
|
|
97 |
cols.remove("pku_safer")
|
98 |
|
99 |
# round
|
100 |
+
df[cols] = (df[cols]*100).round(1)
|
101 |
+
avg = np.nanmean(df[cols].values,axis=1).round(1)
|
102 |
# add average column
|
103 |
df["average"] = avg
|
104 |
|