Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
pminervini
commited on
Commit
·
717e20e
1
Parent(s):
b0b4782
update
Browse files- src/populate.py +4 -3
src/populate.py
CHANGED
@@ -6,10 +6,10 @@ import pandas as pd
|
|
6 |
from src.display.formatting import has_no_nan_values, make_clickable_model
|
7 |
from src.display.utils import AutoEvalColumn, EvalQueueColumn
|
8 |
from src.leaderboard.filter_models import filter_models
|
9 |
-
from src.leaderboard.read_evals import get_raw_eval_results
|
10 |
|
11 |
|
12 |
-
def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchmark_cols: list) -> tuple[
|
13 |
# Returns a list of EvalResult
|
14 |
# raw_data[0]:
|
15 |
# EvalResult(eval_name='EleutherAI_pythia-1.3b_torch.float16', full_model='EleutherAI/pythia-1.3b', org='EleutherAI', model='pythia-1.3b', revision='34b668ff0acfe56f2d541aa46b385557ee39eb3f', results={'arc:challenge': 31.14334470989761, 'hellaswag': 51.43397729535949, 'hendrycksTest': 26.55151159544371, 'truthfulqa:mc': 39.24322830092449, 'winogrande': 57.37963693764798, 'gsm8k': 0.9855951478392722, 'drop': 4.056312919463095}, precision='torch.float16', model_type=<ModelType.PT: ModelTypeDetails(name='pretrained', symbol='🟢')>, weight_type='Original', architecture='GPTNeoXForCausalLM', license='apache-2.0', likes=7, num_params=1.312, date='2023-09-09T10:52:17Z', still_on_hub=True)
|
@@ -20,7 +20,8 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
|
|
20 |
filter_models(all_data_json)
|
21 |
|
22 |
df = pd.DataFrame.from_records(all_data_json)
|
23 |
-
|
|
|
24 |
df = df[cols].round(decimals=2)
|
25 |
|
26 |
# filter out if any of the benchmarks have not been produced
|
|
|
6 |
from src.display.formatting import has_no_nan_values, make_clickable_model
|
7 |
from src.display.utils import AutoEvalColumn, EvalQueueColumn
|
8 |
from src.leaderboard.filter_models import filter_models
|
9 |
+
from src.leaderboard.read_evals import get_raw_eval_results, EvalResult
|
10 |
|
11 |
|
12 |
+
def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchmark_cols: list) -> tuple[list[EvalResult], pd.DataFrame]:
|
13 |
# Returns a list of EvalResult
|
14 |
# raw_data[0]:
|
15 |
# EvalResult(eval_name='EleutherAI_pythia-1.3b_torch.float16', full_model='EleutherAI/pythia-1.3b', org='EleutherAI', model='pythia-1.3b', revision='34b668ff0acfe56f2d541aa46b385557ee39eb3f', results={'arc:challenge': 31.14334470989761, 'hellaswag': 51.43397729535949, 'hendrycksTest': 26.55151159544371, 'truthfulqa:mc': 39.24322830092449, 'winogrande': 57.37963693764798, 'gsm8k': 0.9855951478392722, 'drop': 4.056312919463095}, precision='torch.float16', model_type=<ModelType.PT: ModelTypeDetails(name='pretrained', symbol='🟢')>, weight_type='Original', architecture='GPTNeoXForCausalLM', license='apache-2.0', likes=7, num_params=1.312, date='2023-09-09T10:52:17Z', still_on_hub=True)
|
|
|
20 |
filter_models(all_data_json)
|
21 |
|
22 |
df = pd.DataFrame.from_records(all_data_json)
|
23 |
+
if AutoEvalColumn.average.name in df:
|
24 |
+
df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
|
25 |
df = df[cols].round(decimals=2)
|
26 |
|
27 |
# filter out if any of the benchmarks have not been produced
|