natolambert commited on
Commit
b7aaef4
1 Parent(s): 8799e00

add model type

Browse files
Files changed (2) hide show
  1. app.py +4 -4
  2. src/utils.py +9 -0
app.py CHANGED
@@ -52,7 +52,7 @@ def avg_over_herm(dataframe):
52
  new_df[subset] = np.round(np.average(sub_data, axis=1, weights=sub_counts), 2) # take the weighted average
53
  # new_df[subset] = np.round(np.nanmean(new_df[subset_cols].values, axis=1), 2)
54
 
55
- keep_columns = ["model",] + list(subset_mapping.keys())
56
  # keep_columns = ["model", "average"] + subsets
57
  new_df = new_df[keep_columns]
58
 
@@ -69,7 +69,7 @@ def length_bias_check(dataframe):
69
  Then, take the average of the three buckets as "average"
70
  """
71
  new_df = dataframe.copy()
72
- existing_subsets = new_df.columns[2:]
73
  final_subsets = ["Length Bias", "Neutral", "Terse Bias"]
74
  # new data is empty list dict for each final subset
75
  new_data = {s: [] for s in final_subsets}
@@ -105,8 +105,8 @@ herm_data_length = length_bias_check(herm_data).sort_values(by='Terse Bias', asc
105
  prefs_data = load_all_data(repo_dir_herm, subdir="pref-sets").sort_values(by='average', ascending=False)
106
  # prefs_data_sub = expand_subsets(prefs_data).sort_values(by='average', ascending=False)
107
 
108
- col_types_herm = ["markdown"] + ["number"] * (len(herm_data.columns) - 1)
109
- col_types_herm_avg = ["markdown"] + ["number"] * (len(herm_data_avg.columns) - 1)
110
  cols_herm_data_length = ["markdown"] + ["number"] * (len(herm_data_length.columns) - 1)
111
  col_types_prefs = ["markdown"] + ["number"] * (len(prefs_data.columns) - 1)
112
  # col_types_prefs_sub = ["markdown"] + ["number"] * (len(prefs_data_sub.columns) - 1)
 
52
  new_df[subset] = np.round(np.average(sub_data, axis=1, weights=sub_counts), 2) # take the weighted average
53
  # new_df[subset] = np.round(np.nanmean(new_df[subset_cols].values, axis=1), 2)
54
 
55
+ keep_columns = ["model",] + ["model_type"] + list(subset_mapping.keys())
56
  # keep_columns = ["model", "average"] + subsets
57
  new_df = new_df[keep_columns]
58
 
 
69
  Then, take the average of the three buckets as "average"
70
  """
71
  new_df = dataframe.copy()
72
+ existing_subsets = new_df.columns[3:] # model, model_type, average
73
  final_subsets = ["Length Bias", "Neutral", "Terse Bias"]
74
  # new data is empty list dict for each final subset
75
  new_data = {s: [] for s in final_subsets}
 
105
  prefs_data = load_all_data(repo_dir_herm, subdir="pref-sets").sort_values(by='average', ascending=False)
106
  # prefs_data_sub = expand_subsets(prefs_data).sort_values(by='average', ascending=False)
107
 
108
+ col_types_herm = ["markdown"] + ["str"] + ["number"] * (len(herm_data.columns) - 1)
109
+ col_types_herm_avg = ["markdown"]+ ["str"] + ["number"] * (len(herm_data_avg.columns) - 1)
110
  cols_herm_data_length = ["markdown"] + ["number"] * (len(herm_data_length.columns) - 1)
111
  col_types_prefs = ["markdown"] + ["number"] * (len(prefs_data.columns) - 1)
112
  # col_types_prefs_sub = ["markdown"] + ["number"] * (len(prefs_data_sub.columns) - 1)
src/utils.py CHANGED
@@ -61,6 +61,9 @@ def load_all_data(data_repo, subdir:str, subsubsets=False): # use HF api to p
61
  # select all columns except "model"
62
  cols = df.columns.tolist()
63
  cols.remove("model")
 
 
 
64
  # remove model_beaker from dataframe
65
  if "model_beaker" in cols:
66
  cols.remove("model_beaker")
@@ -80,6 +83,12 @@ def load_all_data(data_repo, subdir:str, subsubsets=False): # use HF api to p
80
  cols.insert(1, cols.pop(cols.index('average')))
81
  df = df.loc[:, cols]
82
 
 
 
 
 
 
 
83
  # remove column xstest (outdated data)
84
  # if xstest is a column
85
  if "xstest" in df.columns:
 
61
  # select all columns except "model"
62
  cols = df.columns.tolist()
63
  cols.remove("model")
64
+ # if model_type is a column (pref tests may not have it)
65
+ if "model_type" in cols:
66
+ cols.remove("model_type")
67
  # remove model_beaker from dataframe
68
  if "model_beaker" in cols:
69
  cols.remove("model_beaker")
 
83
  cols.insert(1, cols.pop(cols.index('average')))
84
  df = df.loc[:, cols]
85
 
86
+ # move model_type column to first
87
+ if "model_type" in cols:
88
+ cols = list(df.columns)
89
+ cols.insert(1, cols.pop(cols.index('model_type')))
90
+ df = df.loc[:, cols]
91
+
92
  # remove column xstest (outdated data)
93
  # if xstest is a column
94
  if "xstest" in df.columns: