pminervini commited on
Commit
dd383e8
1 Parent(s): cd6ab8f
src/__pycache__/envs.cpython-310.pyc DELETED
Binary file (1.11 kB)
 
src/__pycache__/populate.cpython-310.pyc DELETED
Binary file (2.78 kB)
 
src/display/__pycache__/about.cpython-310.pyc DELETED
Binary file (14.2 kB)
 
src/display/__pycache__/css_html_js.cpython-310.pyc DELETED
Binary file (2.07 kB)
 
src/display/__pycache__/formatting.cpython-310.pyc DELETED
Binary file (3.19 kB)
 
src/display/__pycache__/utils.cpython-310.pyc DELETED
Binary file (5.53 kB)
 
src/display/utils.py CHANGED
@@ -26,6 +26,7 @@ class AutoEvalColumn: # Auto evals column
26
  model_type_symbol = ColumnContent("T", "str", True, never_hidden=True)
27
  model = ColumnContent("Model", "markdown", True, never_hidden=True)
28
  average = ColumnContent("Average ⬆️", "number", True)
 
29
  arc = ColumnContent("ARC", "number", True)
30
  hellaswag = ColumnContent("HellaSwag", "number", True)
31
  mmlu = ColumnContent("MMLU", "number", True)
@@ -33,6 +34,8 @@ class AutoEvalColumn: # Auto evals column
33
  winogrande = ColumnContent("Winogrande", "number", True)
34
  gsm8k = ColumnContent("GSM8K", "number", True)
35
  drop = ColumnContent("DROP", "number", True)
 
 
36
  model_type = ColumnContent("Type", "str", False)
37
  architecture = ColumnContent("Architecture", "str", False)
38
  weight_type = ColumnContent("Weight type", "str", False, True)
 
26
  model_type_symbol = ColumnContent("T", "str", True, never_hidden=True)
27
  model = ColumnContent("Model", "markdown", True, never_hidden=True)
28
  average = ColumnContent("Average ⬆️", "number", True)
29
+
30
  arc = ColumnContent("ARC", "number", True)
31
  hellaswag = ColumnContent("HellaSwag", "number", True)
32
  mmlu = ColumnContent("MMLU", "number", True)
 
34
  winogrande = ColumnContent("Winogrande", "number", True)
35
  gsm8k = ColumnContent("GSM8K", "number", True)
36
  drop = ColumnContent("DROP", "number", True)
37
+ nqopen = ColumnContent("NQ Open", "number", True)
38
+
39
  model_type = ColumnContent("Type", "str", False)
40
  architecture = ColumnContent("Architecture", "str", False)
41
  weight_type = ColumnContent("Weight type", "str", False, True)
src/leaderboard/__pycache__/filter_models.cpython-310.pyc DELETED
Binary file (2.1 kB)
 
src/leaderboard/__pycache__/read_evals.cpython-310.pyc DELETED
Binary file (6.56 kB)
 
src/populate.py CHANGED
@@ -10,6 +10,10 @@ from src.leaderboard.read_evals import get_raw_eval_results
10
 
11
 
12
  def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
 
 
 
 
13
  raw_data = get_raw_eval_results(results_path, requests_path)
14
  all_data_json = [v.to_dict() for v in raw_data]
15
  all_data_json.append(baseline_row)
 
10
 
11
 
12
  def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
13
+ # Returns a list of EvalResult
14
+ # raw_data[0]:
15
+ # EvalResult(eval_name='EleutherAI_pythia-1.3b_torch.float16', full_model='EleutherAI/pythia-1.3b', org='EleutherAI', model='pythia-1.3b', revision='34b668ff0acfe56f2d541aa46b385557ee39eb3f', results={'arc:challenge': 31.14334470989761, 'hellaswag': 51.43397729535949, 'hendrycksTest': 26.55151159544371, 'truthfulqa:mc': 39.24322830092449, 'winogrande': 57.37963693764798, 'gsm8k': 0.9855951478392722, 'drop': 4.056312919463095}, precision='torch.float16', model_type=<ModelType.PT: ModelTypeDetails(name='pretrained', symbol='🟢')>, weight_type='Original', architecture='GPTNeoXForCausalLM', license='apache-2.0', likes=7, num_params=1.312, date='2023-09-09T10:52:17Z', still_on_hub=True)
16
+ # EvalResult and get_raw_eval_results are defined in ./src/leaderboard/read_evals.py, the results slots are not hardcoded
17
  raw_data = get_raw_eval_results(results_path, requests_path)
18
  all_data_json = [v.to_dict() for v in raw_data]
19
  all_data_json.append(baseline_row)
src/submission/__pycache__/check_validity.cpython-310.pyc DELETED
Binary file (4.25 kB)
 
src/submission/__pycache__/submit.cpython-310.pyc DELETED
Binary file (3.17 kB)
 
src/tools/__pycache__/collections.cpython-310.pyc DELETED
Binary file (2.57 kB)
 
src/tools/__pycache__/plots.cpython-310.pyc DELETED
Binary file (4.47 kB)