edbeeching commited on
Commit
fcb01e3
1 Parent(s): b2c063a

updates table to include revision

Browse files
Files changed (2) hide show
  1. app.py +3 -3
  2. utils.py +11 -6
app.py CHANGED
@@ -46,8 +46,8 @@ def load_results(model, benchmark, metric):
46
  return mean_acc, data["config"]["model_args"]
47
 
48
 
49
- COLS = ["eval_name", "total ⬆️", "ARC (25-shot) ⬆️", "HellaSwag (10-shot) ⬆️", "MMLU (5-shot) ⬆️", "TruthQA (0-shot) ⬆️", "base_model"]
50
- TYPES = ["str", "number", "number", "number", "number", "number","markdown", ]
51
 
52
  EVAL_COLS = ["model", "revision", "private", "8bit_eval", "is_delta_weight", "status"]
53
  EVAL_TYPES = ["markdown","str", "bool", "bool", "bool", "str"]
@@ -59,7 +59,7 @@ def get_leaderboard():
59
  all_data = get_eval_results_dicts()
60
  dataframe = pd.DataFrame.from_records(all_data)
61
  dataframe = dataframe.sort_values(by=['total ⬆️'], ascending=False)
62
-
63
  dataframe = dataframe[COLS]
64
  return dataframe
65
 
 
46
  return mean_acc, data["config"]["model_args"]
47
 
48
 
49
+ COLS = ["base_model", "revision", "8bit", "total ⬆️", "ARC (25-shot) ⬆️", "HellaSwag (10-shot) ⬆️", "MMLU (5-shot) ⬆️", "TruthQA (0-shot) ⬆️"]
50
+ TYPES = ["markdown","str", "bool", "number", "number", "number", "number", "number", ]
51
 
52
  EVAL_COLS = ["model", "revision", "private", "8bit_eval", "is_delta_weight", "status"]
53
  EVAL_TYPES = ["markdown","str", "bool", "bool", "bool", "str"]
 
59
  all_data = get_eval_results_dicts()
60
  dataframe = pd.DataFrame.from_records(all_data)
61
  dataframe = dataframe.sort_values(by=['total ⬆️'], ascending=False)
62
+ print(dataframe)
63
  dataframe = dataframe[COLS]
64
  return dataframe
65
 
utils.py CHANGED
@@ -50,6 +50,7 @@ class EvalResult:
50
  eval_name : str
51
  org : str
52
  model : str
 
53
  is_8bit : bool
54
  results : dict
55
 
@@ -60,8 +61,11 @@ class EvalResult:
60
  else:
61
  base_model =f"{self.model}"
62
  data_dict = {}
 
63
  data_dict["eval_name"] = self.eval_name
 
64
  data_dict["base_model"] = make_clickable_model(base_model)
 
65
  data_dict["total ⬆️"] = round(sum([v for k,v in self.results.items()]),3)
66
  data_dict["# params"] = get_n_params(base_model)
67
 
@@ -83,21 +87,22 @@ def parse_eval_result(json_filepath: str) -> Tuple[str, dict]:
83
 
84
  path_split = json_filepath.split("/")
85
  org = None
86
- model = path_split[-3]
87
  is_8bit = path_split[-2] == "8bit"
88
- if len(path_split)== 5:
 
89
  # handles gpt2 type models that don't have an org
90
- result_key = f"{path_split[-3]}_{path_split[-2]}"
91
- else:
92
  result_key = f"{path_split[-4]}_{path_split[-3]}_{path_split[-2]}"
93
- org = path_split[-4]
 
 
94
 
95
  eval_result = None
96
  for benchmark, metric in zip(BENCHMARKS, METRICS):
97
  if benchmark in json_filepath:
98
  accs = np.array([v[metric] for k, v in data["results"].items()])
99
  mean_acc = round(np.mean(accs),3)
100
- eval_result = EvalResult(result_key, org, model, is_8bit, {benchmark:mean_acc})
101
 
102
  return result_key, eval_result
103
 
 
50
  eval_name : str
51
  org : str
52
  model : str
53
+ revision : str
54
  is_8bit : bool
55
  results : dict
56
 
 
61
  else:
62
  base_model =f"{self.model}"
63
  data_dict = {}
64
+
65
  data_dict["eval_name"] = self.eval_name
66
+ data_dict["8bit"] = self.is_8bit
67
  data_dict["base_model"] = make_clickable_model(base_model)
68
+ data_dict["revision"] = self.revision
69
  data_dict["total ⬆️"] = round(sum([v for k,v in self.results.items()]),3)
70
  data_dict["# params"] = get_n_params(base_model)
71
 
 
87
 
88
  path_split = json_filepath.split("/")
89
  org = None
90
+ model = path_split[-4]
91
  is_8bit = path_split[-2] == "8bit"
92
+ revision = path_split[-3]
93
+ if len(path_split)== 6:
94
  # handles gpt2 type models that don't have an org
 
 
95
  result_key = f"{path_split[-4]}_{path_split[-3]}_{path_split[-2]}"
96
+ else:
97
+ result_key = f"{path_split[-5]}_{path_split[-4]}_{path_split[-3]}_{path_split[-2]}"
98
+ org = path_split[-5]
99
 
100
  eval_result = None
101
  for benchmark, metric in zip(BENCHMARKS, METRICS):
102
  if benchmark in json_filepath:
103
  accs = np.array([v[metric] for k, v in data["results"].items()])
104
  mean_acc = round(np.mean(accs),3)
105
+ eval_result = EvalResult(result_key, org, model, revision, is_8bit, {benchmark:mean_acc})
106
 
107
  return result_key, eval_result
108