Spaces:
Runtime error
Runtime error
pminervini
commited on
Commit
•
dd383e8
1
Parent(s):
cd6ab8f
update
Browse files- src/__pycache__/envs.cpython-310.pyc +0 -0
- src/__pycache__/populate.cpython-310.pyc +0 -0
- src/display/__pycache__/about.cpython-310.pyc +0 -0
- src/display/__pycache__/css_html_js.cpython-310.pyc +0 -0
- src/display/__pycache__/formatting.cpython-310.pyc +0 -0
- src/display/__pycache__/utils.cpython-310.pyc +0 -0
- src/display/utils.py +3 -0
- src/leaderboard/__pycache__/filter_models.cpython-310.pyc +0 -0
- src/leaderboard/__pycache__/read_evals.cpython-310.pyc +0 -0
- src/populate.py +4 -0
- src/submission/__pycache__/check_validity.cpython-310.pyc +0 -0
- src/submission/__pycache__/submit.cpython-310.pyc +0 -0
- src/tools/__pycache__/collections.cpython-310.pyc +0 -0
- src/tools/__pycache__/plots.cpython-310.pyc +0 -0
src/__pycache__/envs.cpython-310.pyc
DELETED
Binary file (1.11 kB)
|
|
src/__pycache__/populate.cpython-310.pyc
DELETED
Binary file (2.78 kB)
|
|
src/display/__pycache__/about.cpython-310.pyc
DELETED
Binary file (14.2 kB)
|
|
src/display/__pycache__/css_html_js.cpython-310.pyc
DELETED
Binary file (2.07 kB)
|
|
src/display/__pycache__/formatting.cpython-310.pyc
DELETED
Binary file (3.19 kB)
|
|
src/display/__pycache__/utils.cpython-310.pyc
DELETED
Binary file (5.53 kB)
|
|
src/display/utils.py
CHANGED
@@ -26,6 +26,7 @@ class AutoEvalColumn: # Auto evals column
|
|
26 |
model_type_symbol = ColumnContent("T", "str", True, never_hidden=True)
|
27 |
model = ColumnContent("Model", "markdown", True, never_hidden=True)
|
28 |
average = ColumnContent("Average ⬆️", "number", True)
|
|
|
29 |
arc = ColumnContent("ARC", "number", True)
|
30 |
hellaswag = ColumnContent("HellaSwag", "number", True)
|
31 |
mmlu = ColumnContent("MMLU", "number", True)
|
@@ -33,6 +34,8 @@ class AutoEvalColumn: # Auto evals column
|
|
33 |
winogrande = ColumnContent("Winogrande", "number", True)
|
34 |
gsm8k = ColumnContent("GSM8K", "number", True)
|
35 |
drop = ColumnContent("DROP", "number", True)
|
|
|
|
|
36 |
model_type = ColumnContent("Type", "str", False)
|
37 |
architecture = ColumnContent("Architecture", "str", False)
|
38 |
weight_type = ColumnContent("Weight type", "str", False, True)
|
|
|
26 |
model_type_symbol = ColumnContent("T", "str", True, never_hidden=True)
|
27 |
model = ColumnContent("Model", "markdown", True, never_hidden=True)
|
28 |
average = ColumnContent("Average ⬆️", "number", True)
|
29 |
+
|
30 |
arc = ColumnContent("ARC", "number", True)
|
31 |
hellaswag = ColumnContent("HellaSwag", "number", True)
|
32 |
mmlu = ColumnContent("MMLU", "number", True)
|
|
|
34 |
winogrande = ColumnContent("Winogrande", "number", True)
|
35 |
gsm8k = ColumnContent("GSM8K", "number", True)
|
36 |
drop = ColumnContent("DROP", "number", True)
|
37 |
+
nqopen = ColumnContent("NQ Open", "number", True)
|
38 |
+
|
39 |
model_type = ColumnContent("Type", "str", False)
|
40 |
architecture = ColumnContent("Architecture", "str", False)
|
41 |
weight_type = ColumnContent("Weight type", "str", False, True)
|
src/leaderboard/__pycache__/filter_models.cpython-310.pyc
DELETED
Binary file (2.1 kB)
|
|
src/leaderboard/__pycache__/read_evals.cpython-310.pyc
DELETED
Binary file (6.56 kB)
|
|
src/populate.py
CHANGED
@@ -10,6 +10,10 @@ from src.leaderboard.read_evals import get_raw_eval_results
|
|
10 |
|
11 |
|
12 |
def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
|
|
|
|
|
|
|
|
|
13 |
raw_data = get_raw_eval_results(results_path, requests_path)
|
14 |
all_data_json = [v.to_dict() for v in raw_data]
|
15 |
all_data_json.append(baseline_row)
|
|
|
10 |
|
11 |
|
12 |
def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchmark_cols: list) -> pd.DataFrame:
|
13 |
+
# Returns a list of EvalResult
|
14 |
+
# raw_data[0]:
|
15 |
+
# EvalResult(eval_name='EleutherAI_pythia-1.3b_torch.float16', full_model='EleutherAI/pythia-1.3b', org='EleutherAI', model='pythia-1.3b', revision='34b668ff0acfe56f2d541aa46b385557ee39eb3f', results={'arc:challenge': 31.14334470989761, 'hellaswag': 51.43397729535949, 'hendrycksTest': 26.55151159544371, 'truthfulqa:mc': 39.24322830092449, 'winogrande': 57.37963693764798, 'gsm8k': 0.9855951478392722, 'drop': 4.056312919463095}, precision='torch.float16', model_type=<ModelType.PT: ModelTypeDetails(name='pretrained', symbol='🟢')>, weight_type='Original', architecture='GPTNeoXForCausalLM', license='apache-2.0', likes=7, num_params=1.312, date='2023-09-09T10:52:17Z', still_on_hub=True)
|
16 |
+
# EvalResult and get_raw_eval_results are defined in ./src/leaderboard/read_evals.py, the results slots are not hardcoded
|
17 |
raw_data = get_raw_eval_results(results_path, requests_path)
|
18 |
all_data_json = [v.to_dict() for v in raw_data]
|
19 |
all_data_json.append(baseline_row)
|
src/submission/__pycache__/check_validity.cpython-310.pyc
DELETED
Binary file (4.25 kB)
|
|
src/submission/__pycache__/submit.cpython-310.pyc
DELETED
Binary file (3.17 kB)
|
|
src/tools/__pycache__/collections.cpython-310.pyc
DELETED
Binary file (2.57 kB)
|
|
src/tools/__pycache__/plots.cpython-310.pyc
DELETED
Binary file (4.47 kB)
|
|