natolambert commited on
Commit
e5d5995
1 Parent(s): 8e499f4

smol improvements

Browse files
Files changed (2) hide show
  1. app.py +38 -21
  2. requirements.txt +1 -0
app.py CHANGED
@@ -1,6 +1,7 @@
1
  import gradio as gr
2
  import os
3
  from huggingface_hub import HfApi, snapshot_download
 
4
  from datasets import load_dataset
5
  from src.utils import load_all_data
6
  from src.md import ABOUT_TEXT
@@ -15,10 +16,8 @@ eval_set_repo = "ai2-rlhf-collab/rm-benchmark-dev"
15
  repo_dir_herm = "./evals/herm/"
16
  repo_dir_prefs = "./evals/prefs/"
17
 
18
- # def restart_space():
19
- # api.restart_space(repo_id="ai2-rlhf-collab/rm-benchmark-viewer", token=COLLAB_TOKEN)
20
-
21
-
22
 
23
  print("Pulling evaluation results")
24
  repo = snapshot_download(
@@ -43,17 +42,18 @@ def avg_over_herm(dataframe):
43
  """
44
  Averages over the subsets alpacaeval, mt-bench, llmbar, refusals, hep and returns dataframe with only these columns.
45
  """
 
46
  subsets = ["alpacaeval", "mt-bench", "llmbar", "refusals", "hep"]
47
  # for each subset, avg the columns that have the subset in the column name, then add a new column with subset name and avg
48
  for subset in subsets:
49
- subset_cols = [col for col in dataframe.columns if subset in col]
50
- dataframe[subset] = np.round(np.nanmean(dataframe[subset_cols].values, axis=1), 2)
51
 
52
  keep_columns = ["model", "average"] + subsets
53
- dataframe = dataframe[keep_columns]
54
  # replace average column with new average
55
- dataframe["average"] = np.round(np.nanmean(dataframe[subsets].values, axis=1), 2)
56
- return dataframe
57
 
58
  def expand_subsets(dataframe):
59
  # TODO need to modify data/ script to do this
@@ -71,12 +71,23 @@ col_types_prefs = ["markdown"] + ["number"] * (len(prefs_data.columns) - 1)
71
 
72
  # for showing random samples
73
  eval_set = load_dataset(eval_set_repo, use_auth_token=COLLAB_TOKEN, split="filtered")
74
- def random_sample(r: gr.Request):
75
- sample_index = np.random.randint(0, len(eval_set) - 1)
76
- sample = eval_set[sample_index]
 
 
 
 
 
 
 
 
 
77
  markdown_text = '\n\n'.join([f"**{key}**: {value}" for key, value in sample.items()])
78
  return markdown_text
79
 
 
 
80
  with gr.Blocks() as app:
81
  # create tabs for the app, moving the current table to one titled "HERM" and the benchmark_text to a tab called "About"
82
  with gr.Row():
@@ -114,23 +125,29 @@ with gr.Blocks() as app:
114
  with gr.Row():
115
  # loads one sample
116
  gr.Markdown("## Random Dataset Sample Viewer")
 
117
  button = gr.Button("Show Random Sample")
118
 
119
  with gr.Row():
120
  sample_display = gr.Markdown("{sampled data loads here}")
121
 
122
- button.click(fn=random_sample, outputs=sample_display)
123
 
124
 
125
  # Load data when app starts, TODO make this used somewhere...
126
- def load_data_on_start():
127
- data_herm = load_all_data(repo_dir_herm)
128
- herm_table.update(data_herm)
 
 
 
 
 
 
129
 
130
- data_herm_avg = avg_over_herm(repo_dir_herm)
131
- herm_table.update(data_herm_avg)
 
132
 
133
- data_prefs = load_all_data(repo_dir_prefs)
134
- pref_sets_table.update(data_prefs)
135
 
136
- app.launch()
 
1
  import gradio as gr
2
  import os
3
  from huggingface_hub import HfApi, snapshot_download
4
+ from apscheduler.schedulers.background import BackgroundScheduler
5
  from datasets import load_dataset
6
  from src.utils import load_all_data
7
  from src.md import ABOUT_TEXT
 
16
  repo_dir_herm = "./evals/herm/"
17
  repo_dir_prefs = "./evals/prefs/"
18
 
19
+ def restart_space():
20
+ api.restart_space(repo_id="ai2-rlhf-collab/rm-benchmark-viewer", token=COLLAB_TOKEN)
 
 
21
 
22
  print("Pulling evaluation results")
23
  repo = snapshot_download(
 
42
  """
43
  Averages over the subsets alpacaeval, mt-bench, llmbar, refusals, hep and returns dataframe with only these columns.
44
  """
45
+ new_df = dataframe.copy()
46
  subsets = ["alpacaeval", "mt-bench", "llmbar", "refusals", "hep"]
47
  # for each subset, avg the columns that have the subset in the column name, then add a new column with subset name and avg
48
  for subset in subsets:
49
+ subset_cols = [col for col in new_df.columns if subset in col]
50
+ new_df[subset] = np.round(np.nanmean(new_df[subset_cols].values, axis=1), 2)
51
 
52
  keep_columns = ["model", "average"] + subsets
53
+ new_df = new_df[keep_columns]
54
  # replace average column with new average
55
+ new_df["average"] = np.round(np.nanmean(new_df[subsets].values, axis=1), 2)
56
+ return new_df
57
 
58
  def expand_subsets(dataframe):
59
  # TODO need to modify data/ script to do this
 
71
 
72
  # for showing random samples
73
  eval_set = load_dataset(eval_set_repo, use_auth_token=COLLAB_TOKEN, split="filtered")
74
+ def random_sample(r: gr.Request, subset):
75
+ if subset is None or subset == []:
76
+ sample_index = np.random.randint(0, len(eval_set) - 1)
77
+ sample = eval_set[sample_index]
78
+ else: # filter by subsets (can be list)
79
+ if isinstance(subset, str):
80
+ subset = [subset]
81
+ # filter down dataset to only include the subset(s)
82
+ eval_set_filtered = eval_set.filter(lambda x: x["subset"] in subset)
83
+ sample_index = np.random.randint(0, len(eval_set_filtered) - 1)
84
+ sample = eval_set_filtered[sample_index]
85
+
86
  markdown_text = '\n\n'.join([f"**{key}**: {value}" for key, value in sample.items()])
87
  return markdown_text
88
 
89
+ subsets = eval_set.unique("subset")
90
+
91
  with gr.Blocks() as app:
92
  # create tabs for the app, moving the current table to one titled "HERM" and the benchmark_text to a tab called "About"
93
  with gr.Row():
 
125
  with gr.Row():
126
  # loads one sample
127
  gr.Markdown("## Random Dataset Sample Viewer")
128
+ subset_selector = gr.Dropdown(subsets, label="Subset", value=None, multiselect=True)
129
  button = gr.Button("Show Random Sample")
130
 
131
  with gr.Row():
132
  sample_display = gr.Markdown("{sampled data loads here}")
133
 
134
+ button.click(fn=random_sample, inputs=[subset_selector], outputs=[sample_display])
135
 
136
 
137
  # Load data when app starts, TODO make this used somewhere...
138
+ # def load_data_on_start():
139
+ # data_herm = load_all_data(repo_dir_herm)
140
+ # herm_table.update(data_herm)
141
+
142
+ # data_herm_avg = avg_over_herm(repo_dir_herm)
143
+ # herm_table.update(data_herm_avg)
144
+
145
+ # data_prefs = load_all_data(repo_dir_prefs)
146
+ # pref_sets_table.update(data_prefs)
147
 
148
+ scheduler = BackgroundScheduler()
149
+ scheduler.add_job(restart_space, "interval", seconds=10800) # restarted every 3h
150
+ scheduler.start()
151
 
 
 
152
 
153
+ app.queue().launch()
requirements.txt CHANGED
@@ -1,2 +1,3 @@
 
1
  pandas
2
  datasets
 
1
+ APScheduler==3.10.1
2
  pandas
3
  datasets