open-r1-eval-leaderboard
/
eval_results
/edbeeching
/mixtral-8x7b-instruct-v0.1_merged
/v0.1
/mmlu
/results_2024-04-14T14-08-28.050405.json

edbeeching
HF staff
Upload eval_results/edbeeching/mixtral-8x7b-instruct-v0.1_merged/v0.1/mmlu/results_2024-04-14T14-08-28.050405.json with huggingface_hub
3efea34
verified
{ | |
"config_general": { | |
"lighteval_sha": "?", | |
"num_fewshot_seeds": 1, | |
"override_batch_size": 1, | |
"max_samples": null, | |
"job_id": "", | |
"start_time": 3433270.983105714, | |
"end_time": 3441459.889054895, | |
"total_evaluation_time_secondes": "8188.905949180946", | |
"model_name": "edbeeching/mixtral-8x7b-instruct-v0.1_merged", | |
"model_sha": "6da964ddd4ac2f1edfa95d2a72082d87f1007cbf", | |
"model_dtype": "torch.bfloat16", | |
"model_size": "87.49 GB", | |
"config": null | |
}, | |
"results": { | |
"leaderboard|mmlu:abstract_algebra|5": { | |
"acc": 0.35, | |
"acc_stderr": 0.0479372485441102 | |
}, | |
"leaderboard|mmlu:anatomy|5": { | |
"acc": 0.362962962962963, | |
"acc_stderr": 0.04153948404742398 | |
}, | |
"leaderboard|mmlu:astronomy|5": { | |
"acc": 0.40789473684210525, | |
"acc_stderr": 0.03999309712777471 | |
}, | |
"leaderboard|mmlu:business_ethics|5": { | |
"acc": 0.4, | |
"acc_stderr": 0.049236596391733084 | |
}, | |
"leaderboard|mmlu:clinical_knowledge|5": { | |
"acc": 0.4075471698113208, | |
"acc_stderr": 0.030242233800854494 | |
}, | |
"leaderboard|mmlu:college_biology|5": { | |
"acc": 0.3125, | |
"acc_stderr": 0.038760854559127644 | |
}, | |
"leaderboard|mmlu:college_chemistry|5": { | |
"acc": 0.26, | |
"acc_stderr": 0.0440844002276808 | |
}, | |
"leaderboard|mmlu:college_computer_science|5": { | |
"acc": 0.36, | |
"acc_stderr": 0.048241815132442176 | |
}, | |
"leaderboard|mmlu:college_mathematics|5": { | |
"acc": 0.28, | |
"acc_stderr": 0.04512608598542127 | |
}, | |
"leaderboard|mmlu:college_medicine|5": { | |
"acc": 0.34104046242774566, | |
"acc_stderr": 0.03614665424180826 | |
}, | |
"leaderboard|mmlu:college_physics|5": { | |
"acc": 0.20588235294117646, | |
"acc_stderr": 0.04023382273617748 | |
}, | |
"leaderboard|mmlu:computer_security|5": { | |
"acc": 0.46, | |
"acc_stderr": 0.05009082659620333 | |
}, | |
"leaderboard|mmlu:conceptual_physics|5": { | |
"acc": 0.3446808510638298, | |
"acc_stderr": 0.031068985963122155 | |
}, | |
"leaderboard|mmlu:econometrics|5": { | |
"acc": 0.3157894736842105, | |
"acc_stderr": 0.04372748290278007 | |
}, | |
"leaderboard|mmlu:electrical_engineering|5": { | |
"acc": 0.3448275862068966, | |
"acc_stderr": 0.039609335494512087 | |
}, | |
"leaderboard|mmlu:elementary_mathematics|5": { | |
"acc": 0.2777777777777778, | |
"acc_stderr": 0.023068188848261104 | |
}, | |
"leaderboard|mmlu:formal_logic|5": { | |
"acc": 0.30952380952380953, | |
"acc_stderr": 0.04134913018303316 | |
}, | |
"leaderboard|mmlu:global_facts|5": { | |
"acc": 0.4, | |
"acc_stderr": 0.049236596391733084 | |
}, | |
"leaderboard|mmlu:high_school_biology|5": { | |
"acc": 0.4032258064516129, | |
"acc_stderr": 0.027906150826041143 | |
}, | |
"leaderboard|mmlu:high_school_chemistry|5": { | |
"acc": 0.32019704433497537, | |
"acc_stderr": 0.032826493853041504 | |
}, | |
"leaderboard|mmlu:high_school_computer_science|5": { | |
"acc": 0.41, | |
"acc_stderr": 0.04943110704237102 | |
}, | |
"leaderboard|mmlu:high_school_european_history|5": { | |
"acc": 0.41818181818181815, | |
"acc_stderr": 0.03851716319398395 | |
}, | |
"leaderboard|mmlu:high_school_geography|5": { | |
"acc": 0.4696969696969697, | |
"acc_stderr": 0.03555804051763929 | |
}, | |
"leaderboard|mmlu:high_school_government_and_politics|5": { | |
"acc": 0.5025906735751295, | |
"acc_stderr": 0.03608390745384487 | |
}, | |
"leaderboard|mmlu:high_school_macroeconomics|5": { | |
"acc": 0.36153846153846153, | |
"acc_stderr": 0.024359581465396987 | |
}, | |
"leaderboard|mmlu:high_school_mathematics|5": { | |
"acc": 0.29259259259259257, | |
"acc_stderr": 0.027738969632176088 | |
}, | |
"leaderboard|mmlu:high_school_microeconomics|5": { | |
"acc": 0.3277310924369748, | |
"acc_stderr": 0.030489911417673227 | |
}, | |
"leaderboard|mmlu:high_school_physics|5": { | |
"acc": 0.24503311258278146, | |
"acc_stderr": 0.03511807571804723 | |
}, | |
"leaderboard|mmlu:high_school_psychology|5": { | |
"acc": 0.3926605504587156, | |
"acc_stderr": 0.020937505161201093 | |
}, | |
"leaderboard|mmlu:high_school_statistics|5": { | |
"acc": 0.3055555555555556, | |
"acc_stderr": 0.03141554629402543 | |
}, | |
"leaderboard|mmlu:high_school_us_history|5": { | |
"acc": 0.36764705882352944, | |
"acc_stderr": 0.03384132045674119 | |
}, | |
"leaderboard|mmlu:high_school_world_history|5": { | |
"acc": 0.48523206751054854, | |
"acc_stderr": 0.032533028078777386 | |
}, | |
"leaderboard|mmlu:human_aging|5": { | |
"acc": 0.3991031390134529, | |
"acc_stderr": 0.03286745312567961 | |
}, | |
"leaderboard|mmlu:human_sexuality|5": { | |
"acc": 0.3435114503816794, | |
"acc_stderr": 0.041649760719448786 | |
}, | |
"leaderboard|mmlu:international_law|5": { | |
"acc": 0.48760330578512395, | |
"acc_stderr": 0.045629515481807666 | |
}, | |
"leaderboard|mmlu:jurisprudence|5": { | |
"acc": 0.3333333333333333, | |
"acc_stderr": 0.04557239513497751 | |
}, | |
"leaderboard|mmlu:logical_fallacies|5": { | |
"acc": 0.3128834355828221, | |
"acc_stderr": 0.03642914578292405 | |
}, | |
"leaderboard|mmlu:machine_learning|5": { | |
"acc": 0.29464285714285715, | |
"acc_stderr": 0.04327040932578729 | |
}, | |
"leaderboard|mmlu:management|5": { | |
"acc": 0.3883495145631068, | |
"acc_stderr": 0.0482572933735639 | |
}, | |
"leaderboard|mmlu:marketing|5": { | |
"acc": 0.5470085470085471, | |
"acc_stderr": 0.0326109987309862 | |
}, | |
"leaderboard|mmlu:medical_genetics|5": { | |
"acc": 0.32, | |
"acc_stderr": 0.046882617226215034 | |
}, | |
"leaderboard|mmlu:miscellaneous|5": { | |
"acc": 0.46871008939974457, | |
"acc_stderr": 0.017844918090468547 | |
}, | |
"leaderboard|mmlu:moral_disputes|5": { | |
"acc": 0.3554913294797688, | |
"acc_stderr": 0.025770292082977254 | |
}, | |
"leaderboard|mmlu:moral_scenarios|5": { | |
"acc": 0.2446927374301676, | |
"acc_stderr": 0.014378169884098433 | |
}, | |
"leaderboard|mmlu:nutrition|5": { | |
"acc": 0.34967320261437906, | |
"acc_stderr": 0.0273053080762747 | |
}, | |
"leaderboard|mmlu:philosophy|5": { | |
"acc": 0.3633440514469453, | |
"acc_stderr": 0.027316847674192714 | |
}, | |
"leaderboard|mmlu:prehistory|5": { | |
"acc": 0.39197530864197533, | |
"acc_stderr": 0.027163686038271233 | |
}, | |
"leaderboard|mmlu:professional_accounting|5": { | |
"acc": 0.32978723404255317, | |
"acc_stderr": 0.028045946942042398 | |
}, | |
"leaderboard|mmlu:professional_law|5": { | |
"acc": 0.30378096479791394, | |
"acc_stderr": 0.011745787720472472 | |
}, | |
"leaderboard|mmlu:professional_medicine|5": { | |
"acc": 0.3088235294117647, | |
"acc_stderr": 0.028064998167040094 | |
}, | |
"leaderboard|mmlu:professional_psychology|5": { | |
"acc": 0.3202614379084967, | |
"acc_stderr": 0.01887568293806945 | |
}, | |
"leaderboard|mmlu:public_relations|5": { | |
"acc": 0.44545454545454544, | |
"acc_stderr": 0.047605488214603246 | |
}, | |
"leaderboard|mmlu:security_studies|5": { | |
"acc": 0.30612244897959184, | |
"acc_stderr": 0.029504896454595957 | |
}, | |
"leaderboard|mmlu:sociology|5": { | |
"acc": 0.40298507462686567, | |
"acc_stderr": 0.034683432951111266 | |
}, | |
"leaderboard|mmlu:us_foreign_policy|5": { | |
"acc": 0.5, | |
"acc_stderr": 0.050251890762960605 | |
}, | |
"leaderboard|mmlu:virology|5": { | |
"acc": 0.27710843373493976, | |
"acc_stderr": 0.03484331592680588 | |
}, | |
"leaderboard|mmlu:world_religions|5": { | |
"acc": 0.4619883040935672, | |
"acc_stderr": 0.03823727092882307 | |
}, | |
"leaderboard|mmlu:_average|5": { | |
"acc": 0.36313937301501126, | |
"acc_stderr": 0.035636090562093964 | |
}, | |
"all": { | |
"acc": 0.36313937301501126, | |
"acc_stderr": 0.035636090562093964 | |
} | |
}, | |
"versions": { | |
"leaderboard|mmlu:abstract_algebra|5": 0, | |
"leaderboard|mmlu:anatomy|5": 0, | |
"leaderboard|mmlu:astronomy|5": 0, | |
"leaderboard|mmlu:business_ethics|5": 0, | |
"leaderboard|mmlu:clinical_knowledge|5": 0, | |
"leaderboard|mmlu:college_biology|5": 0, | |
"leaderboard|mmlu:college_chemistry|5": 0, | |
"leaderboard|mmlu:college_computer_science|5": 0, | |
"leaderboard|mmlu:college_mathematics|5": 0, | |
"leaderboard|mmlu:college_medicine|5": 0, | |
"leaderboard|mmlu:college_physics|5": 0, | |
"leaderboard|mmlu:computer_security|5": 0, | |
"leaderboard|mmlu:conceptual_physics|5": 0, | |
"leaderboard|mmlu:econometrics|5": 0, | |
"leaderboard|mmlu:electrical_engineering|5": 0, | |
"leaderboard|mmlu:elementary_mathematics|5": 0, | |
"leaderboard|mmlu:formal_logic|5": 0, | |
"leaderboard|mmlu:global_facts|5": 0, | |
"leaderboard|mmlu:high_school_biology|5": 0, | |
"leaderboard|mmlu:high_school_chemistry|5": 0, | |
"leaderboard|mmlu:high_school_computer_science|5": 0, | |
"leaderboard|mmlu:high_school_european_history|5": 0, | |
"leaderboard|mmlu:high_school_geography|5": 0, | |
"leaderboard|mmlu:high_school_government_and_politics|5": 0, | |
"leaderboard|mmlu:high_school_macroeconomics|5": 0, | |
"leaderboard|mmlu:high_school_mathematics|5": 0, | |
"leaderboard|mmlu:high_school_microeconomics|5": 0, | |
"leaderboard|mmlu:high_school_physics|5": 0, | |
"leaderboard|mmlu:high_school_psychology|5": 0, | |
"leaderboard|mmlu:high_school_statistics|5": 0, | |
"leaderboard|mmlu:high_school_us_history|5": 0, | |
"leaderboard|mmlu:high_school_world_history|5": 0, | |
"leaderboard|mmlu:human_aging|5": 0, | |
"leaderboard|mmlu:human_sexuality|5": 0, | |
"leaderboard|mmlu:international_law|5": 0, | |
"leaderboard|mmlu:jurisprudence|5": 0, | |
"leaderboard|mmlu:logical_fallacies|5": 0, | |
"leaderboard|mmlu:machine_learning|5": 0, | |
"leaderboard|mmlu:management|5": 0, | |
"leaderboard|mmlu:marketing|5": 0, | |
"leaderboard|mmlu:medical_genetics|5": 0, | |
"leaderboard|mmlu:miscellaneous|5": 0, | |
"leaderboard|mmlu:moral_disputes|5": 0, | |
"leaderboard|mmlu:moral_scenarios|5": 0, | |
"leaderboard|mmlu:nutrition|5": 0, | |
"leaderboard|mmlu:philosophy|5": 0, | |
"leaderboard|mmlu:prehistory|5": 0, | |
"leaderboard|mmlu:professional_accounting|5": 0, | |
"leaderboard|mmlu:professional_law|5": 0, | |
"leaderboard|mmlu:professional_medicine|5": 0, | |
"leaderboard|mmlu:professional_psychology|5": 0, | |
"leaderboard|mmlu:public_relations|5": 0, | |
"leaderboard|mmlu:security_studies|5": 0, | |
"leaderboard|mmlu:sociology|5": 0, | |
"leaderboard|mmlu:us_foreign_policy|5": 0, | |
"leaderboard|mmlu:virology|5": 0, | |
"leaderboard|mmlu:world_religions|5": 0 | |
}, | |
"config_tasks": { | |
"leaderboard|mmlu:abstract_algebra": { | |
"name": "mmlu:abstract_algebra", | |
"prompt_function": "mmlu_harness", | |
"hf_repo": "lighteval/mmlu", | |
"hf_subset": "abstract_algebra", | |
"metric": [ | |
"loglikelihood_acc" | |
], | |
"hf_avail_splits": [ | |
"auxiliary_train", | |
"test", | |
"validation", | |
"dev" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": "dev", | |
"few_shots_select": "sequential", | |
"generation_size": 1, | |
"stop_sequence": [ | |
"\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"leaderboard", | |
"mmlu" | |
], | |
"original_num_docs": 100, | |
"effective_num_docs": 100, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"leaderboard|mmlu:anatomy": { | |
"name": "mmlu:anatomy", | |
"prompt_function": "mmlu_harness", | |
"hf_repo": "lighteval/mmlu", | |
"hf_subset": "anatomy", | |
"metric": [ | |
"loglikelihood_acc" | |
], | |
"hf_avail_splits": [ | |
"auxiliary_train", | |
"test", | |
"validation", | |
"dev" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": "dev", | |
"few_shots_select": "sequential", | |
"generation_size": 1, | |
"stop_sequence": [ | |
"\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"leaderboard", | |
"mmlu" | |
], | |
"original_num_docs": 135, | |
"effective_num_docs": 135, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"leaderboard|mmlu:astronomy": { | |
"name": "mmlu:astronomy", | |
"prompt_function": "mmlu_harness", | |
"hf_repo": "lighteval/mmlu", | |
"hf_subset": "astronomy", | |
"metric": [ | |
"loglikelihood_acc" | |
], | |
"hf_avail_splits": [ | |
"auxiliary_train", | |
"test", | |
"validation", | |
"dev" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": "dev", | |
"few_shots_select": "sequential", | |
"generation_size": 1, | |
"stop_sequence": [ | |
"\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"leaderboard", | |
"mmlu" | |
], | |
"original_num_docs": 152, | |
"effective_num_docs": 152, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"leaderboard|mmlu:business_ethics": { | |
"name": "mmlu:business_ethics", | |
"prompt_function": "mmlu_harness", | |
"hf_repo": "lighteval/mmlu", | |
"hf_subset": "business_ethics", | |
"metric": [ | |
"loglikelihood_acc" | |
], | |
"hf_avail_splits": [ | |
"auxiliary_train", | |
"test", | |
"validation", | |
"dev" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": "dev", | |
"few_shots_select": "sequential", | |
"generation_size": 1, | |
"stop_sequence": [ | |
"\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"leaderboard", | |
"mmlu" | |
], | |
"original_num_docs": 100, | |
"effective_num_docs": 100, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"leaderboard|mmlu:clinical_knowledge": { | |
"name": "mmlu:clinical_knowledge", | |
"prompt_function": "mmlu_harness", | |
"hf_repo": "lighteval/mmlu", | |
"hf_subset": "clinical_knowledge", | |
"metric": [ | |
"loglikelihood_acc" | |
], | |
"hf_avail_splits": [ | |
"auxiliary_train", | |
"test", | |
"validation", | |
"dev" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": "dev", | |
"few_shots_select": "sequential", | |
"generation_size": 1, | |
"stop_sequence": [ | |
"\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"leaderboard", | |
"mmlu" | |
], | |
"original_num_docs": 265, | |
"effective_num_docs": 265, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"leaderboard|mmlu:college_biology": { | |
"name": "mmlu:college_biology", | |
"prompt_function": "mmlu_harness", | |
"hf_repo": "lighteval/mmlu", | |
"hf_subset": "college_biology", | |
"metric": [ | |
"loglikelihood_acc" | |
], | |
"hf_avail_splits": [ | |
"auxiliary_train", | |
"test", | |
"validation", | |
"dev" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": "dev", | |
"few_shots_select": "sequential", | |
"generation_size": 1, | |
"stop_sequence": [ | |
"\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"leaderboard", | |
"mmlu" | |
], | |
"original_num_docs": 144, | |
"effective_num_docs": 144, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"leaderboard|mmlu:college_chemistry": { | |
"name": "mmlu:college_chemistry", | |
"prompt_function": "mmlu_harness", | |
"hf_repo": "lighteval/mmlu", | |
"hf_subset": "college_chemistry", | |
"metric": [ | |
"loglikelihood_acc" | |
], | |
"hf_avail_splits": [ | |
"auxiliary_train", | |
"test", | |
"validation", | |
"dev" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": "dev", | |
"few_shots_select": "sequential", | |
"generation_size": 1, | |
"stop_sequence": [ | |
"\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"leaderboard", | |
"mmlu" | |
], | |
"original_num_docs": 100, | |
"effective_num_docs": 100, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"leaderboard|mmlu:college_computer_science": { | |
"name": "mmlu:college_computer_science", | |
"prompt_function": "mmlu_harness", | |
"hf_repo": "lighteval/mmlu", | |
"hf_subset": "college_computer_science", | |
"metric": [ | |
"loglikelihood_acc" | |
], | |
"hf_avail_splits": [ | |
"auxiliary_train", | |
"test", | |
"validation", | |
"dev" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": "dev", | |
"few_shots_select": "sequential", | |
"generation_size": 1, | |
"stop_sequence": [ | |
"\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"leaderboard", | |
"mmlu" | |
], | |
"original_num_docs": 100, | |
"effective_num_docs": 100, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"leaderboard|mmlu:college_mathematics": { | |
"name": "mmlu:college_mathematics", | |
"prompt_function": "mmlu_harness", | |
"hf_repo": "lighteval/mmlu", | |
"hf_subset": "college_mathematics", | |
"metric": [ | |
"loglikelihood_acc" | |
], | |
"hf_avail_splits": [ | |
"auxiliary_train", | |
"test", | |
"validation", | |
"dev" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": "dev", | |
"few_shots_select": "sequential", | |
"generation_size": 1, | |
"stop_sequence": [ | |
"\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"leaderboard", | |
"mmlu" | |
], | |
"original_num_docs": 100, | |
"effective_num_docs": 100, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"leaderboard|mmlu:college_medicine": { | |
"name": "mmlu:college_medicine", | |
"prompt_function": "mmlu_harness", | |
"hf_repo": "lighteval/mmlu", | |
"hf_subset": "college_medicine", | |
"metric": [ | |
"loglikelihood_acc" | |
], | |
"hf_avail_splits": [ | |
"auxiliary_train", | |
"test", | |
"validation", | |
"dev" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": "dev", | |
"few_shots_select": "sequential", | |
"generation_size": 1, | |
"stop_sequence": [ | |
"\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"leaderboard", | |
"mmlu" | |
], | |
"original_num_docs": 173, | |
"effective_num_docs": 173, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"leaderboard|mmlu:college_physics": { | |
"name": "mmlu:college_physics", | |
"prompt_function": "mmlu_harness", | |
"hf_repo": "lighteval/mmlu", | |
"hf_subset": "college_physics", | |
"metric": [ | |
"loglikelihood_acc" | |
], | |
"hf_avail_splits": [ | |
"auxiliary_train", | |
"test", | |
"validation", | |
"dev" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": "dev", | |
"few_shots_select": "sequential", | |
"generation_size": 1, | |
"stop_sequence": [ | |
"\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"leaderboard", | |
"mmlu" | |
], | |
"original_num_docs": 102, | |
"effective_num_docs": 102, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"leaderboard|mmlu:computer_security": { | |
"name": "mmlu:computer_security", | |
"prompt_function": "mmlu_harness", | |
"hf_repo": "lighteval/mmlu", | |
"hf_subset": "computer_security", | |
"metric": [ | |
"loglikelihood_acc" | |
], | |
"hf_avail_splits": [ | |
"auxiliary_train", | |
"test", | |
"validation", | |
"dev" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": "dev", | |
"few_shots_select": "sequential", | |
"generation_size": 1, | |
"stop_sequence": [ | |
"\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"leaderboard", | |
"mmlu" | |
], | |
"original_num_docs": 100, | |
"effective_num_docs": 100, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"leaderboard|mmlu:conceptual_physics": { | |
"name": "mmlu:conceptual_physics", | |
"prompt_function": "mmlu_harness", | |
"hf_repo": "lighteval/mmlu", | |
"hf_subset": "conceptual_physics", | |
"metric": [ | |
"loglikelihood_acc" | |
], | |
"hf_avail_splits": [ | |
"auxiliary_train", | |
"test", | |
"validation", | |
"dev" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": "dev", | |
"few_shots_select": "sequential", | |
"generation_size": 1, | |
"stop_sequence": [ | |
"\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"leaderboard", | |
"mmlu" | |
], | |
"original_num_docs": 235, | |
"effective_num_docs": 235, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"leaderboard|mmlu:econometrics": { | |
"name": "mmlu:econometrics", | |
"prompt_function": "mmlu_harness", | |
"hf_repo": "lighteval/mmlu", | |
"hf_subset": "econometrics", | |
"metric": [ | |
"loglikelihood_acc" | |
], | |
"hf_avail_splits": [ | |
"auxiliary_train", | |
"test", | |
"validation", | |
"dev" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": "dev", | |
"few_shots_select": "sequential", | |
"generation_size": 1, | |
"stop_sequence": [ | |
"\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"leaderboard", | |
"mmlu" | |
], | |
"original_num_docs": 114, | |
"effective_num_docs": 114, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"leaderboard|mmlu:electrical_engineering": { | |
"name": "mmlu:electrical_engineering", | |
"prompt_function": "mmlu_harness", | |
"hf_repo": "lighteval/mmlu", | |
"hf_subset": "electrical_engineering", | |
"metric": [ | |
"loglikelihood_acc" | |
], | |
"hf_avail_splits": [ | |
"auxiliary_train", | |
"test", | |
"validation", | |
"dev" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": "dev", | |
"few_shots_select": "sequential", | |
"generation_size": 1, | |
"stop_sequence": [ | |
"\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"leaderboard", | |
"mmlu" | |
], | |
"original_num_docs": 145, | |
"effective_num_docs": 145, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"leaderboard|mmlu:elementary_mathematics": { | |
"name": "mmlu:elementary_mathematics", | |
"prompt_function": "mmlu_harness", | |
"hf_repo": "lighteval/mmlu", | |
"hf_subset": "elementary_mathematics", | |
"metric": [ | |
"loglikelihood_acc" | |
], | |
"hf_avail_splits": [ | |
"auxiliary_train", | |
"test", | |
"validation", | |
"dev" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": "dev", | |
"few_shots_select": "sequential", | |
"generation_size": 1, | |
"stop_sequence": [ | |
"\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"leaderboard", | |
"mmlu" | |
], | |
"original_num_docs": 378, | |
"effective_num_docs": 378, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"leaderboard|mmlu:formal_logic": { | |
"name": "mmlu:formal_logic", | |
"prompt_function": "mmlu_harness", | |
"hf_repo": "lighteval/mmlu", | |
"hf_subset": "formal_logic", | |
"metric": [ | |
"loglikelihood_acc" | |
], | |
"hf_avail_splits": [ | |
"auxiliary_train", | |
"test", | |
"validation", | |
"dev" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": "dev", | |
"few_shots_select": "sequential", | |
"generation_size": 1, | |
"stop_sequence": [ | |
"\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"leaderboard", | |
"mmlu" | |
], | |
"original_num_docs": 126, | |
"effective_num_docs": 126, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"leaderboard|mmlu:global_facts": { | |
"name": "mmlu:global_facts", | |
"prompt_function": "mmlu_harness", | |
"hf_repo": "lighteval/mmlu", | |
"hf_subset": "global_facts", | |
"metric": [ | |
"loglikelihood_acc" | |
], | |
"hf_avail_splits": [ | |
"auxiliary_train", | |
"test", | |
"validation", | |
"dev" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": "dev", | |
"few_shots_select": "sequential", | |
"generation_size": 1, | |
"stop_sequence": [ | |
"\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"leaderboard", | |
"mmlu" | |
], | |
"original_num_docs": 100, | |
"effective_num_docs": 100, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"leaderboard|mmlu:high_school_biology": { | |
"name": "mmlu:high_school_biology", | |
"prompt_function": "mmlu_harness", | |
"hf_repo": "lighteval/mmlu", | |
"hf_subset": "high_school_biology", | |
"metric": [ | |
"loglikelihood_acc" | |
], | |
"hf_avail_splits": [ | |
"auxiliary_train", | |
"test", | |
"validation", | |
"dev" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": "dev", | |
"few_shots_select": "sequential", | |
"generation_size": 1, | |
"stop_sequence": [ | |
"\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"leaderboard", | |
"mmlu" | |
], | |
"original_num_docs": 310, | |
"effective_num_docs": 310, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"leaderboard|mmlu:high_school_chemistry": { | |
"name": "mmlu:high_school_chemistry", | |
"prompt_function": "mmlu_harness", | |
"hf_repo": "lighteval/mmlu", | |
"hf_subset": "high_school_chemistry", | |
"metric": [ | |
"loglikelihood_acc" | |
], | |
"hf_avail_splits": [ | |
"auxiliary_train", | |
"test", | |
"validation", | |
"dev" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": "dev", | |
"few_shots_select": "sequential", | |
"generation_size": 1, | |
"stop_sequence": [ | |
"\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"leaderboard", | |
"mmlu" | |
], | |
"original_num_docs": 203, | |
"effective_num_docs": 203, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"leaderboard|mmlu:high_school_computer_science": { | |
"name": "mmlu:high_school_computer_science", | |
"prompt_function": "mmlu_harness", | |
"hf_repo": "lighteval/mmlu", | |
"hf_subset": "high_school_computer_science", | |
"metric": [ | |
"loglikelihood_acc" | |
], | |
"hf_avail_splits": [ | |
"auxiliary_train", | |
"test", | |
"validation", | |
"dev" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": "dev", | |
"few_shots_select": "sequential", | |
"generation_size": 1, | |
"stop_sequence": [ | |
"\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"leaderboard", | |
"mmlu" | |
], | |
"original_num_docs": 100, | |
"effective_num_docs": 100, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"leaderboard|mmlu:high_school_european_history": { | |
"name": "mmlu:high_school_european_history", | |
"prompt_function": "mmlu_harness", | |
"hf_repo": "lighteval/mmlu", | |
"hf_subset": "high_school_european_history", | |
"metric": [ | |
"loglikelihood_acc" | |
], | |
"hf_avail_splits": [ | |
"auxiliary_train", | |
"test", | |
"validation", | |
"dev" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": "dev", | |
"few_shots_select": "sequential", | |
"generation_size": 1, | |
"stop_sequence": [ | |
"\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"leaderboard", | |
"mmlu" | |
], | |
"original_num_docs": 165, | |
"effective_num_docs": 165, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"leaderboard|mmlu:high_school_geography": { | |
"name": "mmlu:high_school_geography", | |
"prompt_function": "mmlu_harness", | |
"hf_repo": "lighteval/mmlu", | |
"hf_subset": "high_school_geography", | |
"metric": [ | |
"loglikelihood_acc" | |
], | |
"hf_avail_splits": [ | |
"auxiliary_train", | |
"test", | |
"validation", | |
"dev" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": "dev", | |
"few_shots_select": "sequential", | |
"generation_size": 1, | |
"stop_sequence": [ | |
"\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"leaderboard", | |
"mmlu" | |
], | |
"original_num_docs": 198, | |
"effective_num_docs": 198, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"leaderboard|mmlu:high_school_government_and_politics": { | |
"name": "mmlu:high_school_government_and_politics", | |
"prompt_function": "mmlu_harness", | |
"hf_repo": "lighteval/mmlu", | |
"hf_subset": "high_school_government_and_politics", | |
"metric": [ | |
"loglikelihood_acc" | |
], | |
"hf_avail_splits": [ | |
"auxiliary_train", | |
"test", | |
"validation", | |
"dev" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": "dev", | |
"few_shots_select": "sequential", | |
"generation_size": 1, | |
"stop_sequence": [ | |
"\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"leaderboard", | |
"mmlu" | |
], | |
"original_num_docs": 193, | |
"effective_num_docs": 193, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"leaderboard|mmlu:high_school_macroeconomics": { | |
"name": "mmlu:high_school_macroeconomics", | |
"prompt_function": "mmlu_harness", | |
"hf_repo": "lighteval/mmlu", | |
"hf_subset": "high_school_macroeconomics", | |
"metric": [ | |
"loglikelihood_acc" | |
], | |
"hf_avail_splits": [ | |
"auxiliary_train", | |
"test", | |
"validation", | |
"dev" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": "dev", | |
"few_shots_select": "sequential", | |
"generation_size": 1, | |
"stop_sequence": [ | |
"\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"leaderboard", | |
"mmlu" | |
], | |
"original_num_docs": 390, | |
"effective_num_docs": 390, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"leaderboard|mmlu:high_school_mathematics": { | |
"name": "mmlu:high_school_mathematics", | |
"prompt_function": "mmlu_harness", | |
"hf_repo": "lighteval/mmlu", | |
"hf_subset": "high_school_mathematics", | |
"metric": [ | |
"loglikelihood_acc" | |
], | |
"hf_avail_splits": [ | |
"auxiliary_train", | |
"test", | |
"validation", | |
"dev" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": "dev", | |
"few_shots_select": "sequential", | |
"generation_size": 1, | |
"stop_sequence": [ | |
"\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"leaderboard", | |
"mmlu" | |
], | |
"original_num_docs": 270, | |
"effective_num_docs": 270, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"leaderboard|mmlu:high_school_microeconomics": { | |
"name": "mmlu:high_school_microeconomics", | |
"prompt_function": "mmlu_harness", | |
"hf_repo": "lighteval/mmlu", | |
"hf_subset": "high_school_microeconomics", | |
"metric": [ | |
"loglikelihood_acc" | |
], | |
"hf_avail_splits": [ | |
"auxiliary_train", | |
"test", | |
"validation", | |
"dev" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": "dev", | |
"few_shots_select": "sequential", | |
"generation_size": 1, | |
"stop_sequence": [ | |
"\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"leaderboard", | |
"mmlu" | |
], | |
"original_num_docs": 238, | |
"effective_num_docs": 238, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"leaderboard|mmlu:high_school_physics": { | |
"name": "mmlu:high_school_physics", | |
"prompt_function": "mmlu_harness", | |
"hf_repo": "lighteval/mmlu", | |
"hf_subset": "high_school_physics", | |
"metric": [ | |
"loglikelihood_acc" | |
], | |
"hf_avail_splits": [ | |
"auxiliary_train", | |
"test", | |
"validation", | |
"dev" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": "dev", | |
"few_shots_select": "sequential", | |
"generation_size": 1, | |
"stop_sequence": [ | |
"\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"leaderboard", | |
"mmlu" | |
], | |
"original_num_docs": 151, | |
"effective_num_docs": 151, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"leaderboard|mmlu:high_school_psychology": { | |
"name": "mmlu:high_school_psychology", | |
"prompt_function": "mmlu_harness", | |
"hf_repo": "lighteval/mmlu", | |
"hf_subset": "high_school_psychology", | |
"metric": [ | |
"loglikelihood_acc" | |
], | |
"hf_avail_splits": [ | |
"auxiliary_train", | |
"test", | |
"validation", | |
"dev" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": "dev", | |
"few_shots_select": "sequential", | |
"generation_size": 1, | |
"stop_sequence": [ | |
"\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"leaderboard", | |
"mmlu" | |
], | |
"original_num_docs": 545, | |
"effective_num_docs": 545, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"leaderboard|mmlu:high_school_statistics": { | |
"name": "mmlu:high_school_statistics", | |
"prompt_function": "mmlu_harness", | |
"hf_repo": "lighteval/mmlu", | |
"hf_subset": "high_school_statistics", | |
"metric": [ | |
"loglikelihood_acc" | |
], | |
"hf_avail_splits": [ | |
"auxiliary_train", | |
"test", | |
"validation", | |
"dev" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": "dev", | |
"few_shots_select": "sequential", | |
"generation_size": 1, | |
"stop_sequence": [ | |
"\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"leaderboard", | |
"mmlu" | |
], | |
"original_num_docs": 216, | |
"effective_num_docs": 216, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"leaderboard|mmlu:high_school_us_history": { | |
"name": "mmlu:high_school_us_history", | |
"prompt_function": "mmlu_harness", | |
"hf_repo": "lighteval/mmlu", | |
"hf_subset": "high_school_us_history", | |
"metric": [ | |
"loglikelihood_acc" | |
], | |
"hf_avail_splits": [ | |
"auxiliary_train", | |
"test", | |
"validation", | |
"dev" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": "dev", | |
"few_shots_select": "sequential", | |
"generation_size": 1, | |
"stop_sequence": [ | |
"\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"leaderboard", | |
"mmlu" | |
], | |
"original_num_docs": 204, | |
"effective_num_docs": 204, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"leaderboard|mmlu:high_school_world_history": { | |
"name": "mmlu:high_school_world_history", | |
"prompt_function": "mmlu_harness", | |
"hf_repo": "lighteval/mmlu", | |
"hf_subset": "high_school_world_history", | |
"metric": [ | |
"loglikelihood_acc" | |
], | |
"hf_avail_splits": [ | |
"auxiliary_train", | |
"test", | |
"validation", | |
"dev" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": "dev", | |
"few_shots_select": "sequential", | |
"generation_size": 1, | |
"stop_sequence": [ | |
"\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"leaderboard", | |
"mmlu" | |
], | |
"original_num_docs": 237, | |
"effective_num_docs": 237, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"leaderboard|mmlu:human_aging": { | |
"name": "mmlu:human_aging", | |
"prompt_function": "mmlu_harness", | |
"hf_repo": "lighteval/mmlu", | |
"hf_subset": "human_aging", | |
"metric": [ | |
"loglikelihood_acc" | |
], | |
"hf_avail_splits": [ | |
"auxiliary_train", | |
"test", | |
"validation", | |
"dev" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": "dev", | |
"few_shots_select": "sequential", | |
"generation_size": 1, | |
"stop_sequence": [ | |
"\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"leaderboard", | |
"mmlu" | |
], | |
"original_num_docs": 223, | |
"effective_num_docs": 223, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"leaderboard|mmlu:human_sexuality": { | |
"name": "mmlu:human_sexuality", | |
"prompt_function": "mmlu_harness", | |
"hf_repo": "lighteval/mmlu", | |
"hf_subset": "human_sexuality", | |
"metric": [ | |
"loglikelihood_acc" | |
], | |
"hf_avail_splits": [ | |
"auxiliary_train", | |
"test", | |
"validation", | |
"dev" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": "dev", | |
"few_shots_select": "sequential", | |
"generation_size": 1, | |
"stop_sequence": [ | |
"\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"leaderboard", | |
"mmlu" | |
], | |
"original_num_docs": 131, | |
"effective_num_docs": 131, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"leaderboard|mmlu:international_law": { | |
"name": "mmlu:international_law", | |
"prompt_function": "mmlu_harness", | |
"hf_repo": "lighteval/mmlu", | |
"hf_subset": "international_law", | |
"metric": [ | |
"loglikelihood_acc" | |
], | |
"hf_avail_splits": [ | |
"auxiliary_train", | |
"test", | |
"validation", | |
"dev" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": "dev", | |
"few_shots_select": "sequential", | |
"generation_size": 1, | |
"stop_sequence": [ | |
"\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"leaderboard", | |
"mmlu" | |
], | |
"original_num_docs": 121, | |
"effective_num_docs": 121, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"leaderboard|mmlu:jurisprudence": { | |
"name": "mmlu:jurisprudence", | |
"prompt_function": "mmlu_harness", | |
"hf_repo": "lighteval/mmlu", | |
"hf_subset": "jurisprudence", | |
"metric": [ | |
"loglikelihood_acc" | |
], | |
"hf_avail_splits": [ | |
"auxiliary_train", | |
"test", | |
"validation", | |
"dev" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": "dev", | |
"few_shots_select": "sequential", | |
"generation_size": 1, | |
"stop_sequence": [ | |
"\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"leaderboard", | |
"mmlu" | |
], | |
"original_num_docs": 108, | |
"effective_num_docs": 108, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"leaderboard|mmlu:logical_fallacies": { | |
"name": "mmlu:logical_fallacies", | |
"prompt_function": "mmlu_harness", | |
"hf_repo": "lighteval/mmlu", | |
"hf_subset": "logical_fallacies", | |
"metric": [ | |
"loglikelihood_acc" | |
], | |
"hf_avail_splits": [ | |
"auxiliary_train", | |
"test", | |
"validation", | |
"dev" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": "dev", | |
"few_shots_select": "sequential", | |
"generation_size": 1, | |
"stop_sequence": [ | |
"\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"leaderboard", | |
"mmlu" | |
], | |
"original_num_docs": 163, | |
"effective_num_docs": 163, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"leaderboard|mmlu:machine_learning": { | |
"name": "mmlu:machine_learning", | |
"prompt_function": "mmlu_harness", | |
"hf_repo": "lighteval/mmlu", | |
"hf_subset": "machine_learning", | |
"metric": [ | |
"loglikelihood_acc" | |
], | |
"hf_avail_splits": [ | |
"auxiliary_train", | |
"test", | |
"validation", | |
"dev" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": "dev", | |
"few_shots_select": "sequential", | |
"generation_size": 1, | |
"stop_sequence": [ | |
"\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"leaderboard", | |
"mmlu" | |
], | |
"original_num_docs": 112, | |
"effective_num_docs": 112, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"leaderboard|mmlu:management": { | |
"name": "mmlu:management", | |
"prompt_function": "mmlu_harness", | |
"hf_repo": "lighteval/mmlu", | |
"hf_subset": "management", | |
"metric": [ | |
"loglikelihood_acc" | |
], | |
"hf_avail_splits": [ | |
"auxiliary_train", | |
"test", | |
"validation", | |
"dev" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": "dev", | |
"few_shots_select": "sequential", | |
"generation_size": 1, | |
"stop_sequence": [ | |
"\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"leaderboard", | |
"mmlu" | |
], | |
"original_num_docs": 103, | |
"effective_num_docs": 103, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"leaderboard|mmlu:marketing": { | |
"name": "mmlu:marketing", | |
"prompt_function": "mmlu_harness", | |
"hf_repo": "lighteval/mmlu", | |
"hf_subset": "marketing", | |
"metric": [ | |
"loglikelihood_acc" | |
], | |
"hf_avail_splits": [ | |
"auxiliary_train", | |
"test", | |
"validation", | |
"dev" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": "dev", | |
"few_shots_select": "sequential", | |
"generation_size": 1, | |
"stop_sequence": [ | |
"\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"leaderboard", | |
"mmlu" | |
], | |
"original_num_docs": 234, | |
"effective_num_docs": 234, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"leaderboard|mmlu:medical_genetics": { | |
"name": "mmlu:medical_genetics", | |
"prompt_function": "mmlu_harness", | |
"hf_repo": "lighteval/mmlu", | |
"hf_subset": "medical_genetics", | |
"metric": [ | |
"loglikelihood_acc" | |
], | |
"hf_avail_splits": [ | |
"auxiliary_train", | |
"test", | |
"validation", | |
"dev" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": "dev", | |
"few_shots_select": "sequential", | |
"generation_size": 1, | |
"stop_sequence": [ | |
"\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"leaderboard", | |
"mmlu" | |
], | |
"original_num_docs": 100, | |
"effective_num_docs": 100, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"leaderboard|mmlu:miscellaneous": { | |
"name": "mmlu:miscellaneous", | |
"prompt_function": "mmlu_harness", | |
"hf_repo": "lighteval/mmlu", | |
"hf_subset": "miscellaneous", | |
"metric": [ | |
"loglikelihood_acc" | |
], | |
"hf_avail_splits": [ | |
"auxiliary_train", | |
"test", | |
"validation", | |
"dev" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": "dev", | |
"few_shots_select": "sequential", | |
"generation_size": 1, | |
"stop_sequence": [ | |
"\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"leaderboard", | |
"mmlu" | |
], | |
"original_num_docs": 783, | |
"effective_num_docs": 783, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"leaderboard|mmlu:moral_disputes": { | |
"name": "mmlu:moral_disputes", | |
"prompt_function": "mmlu_harness", | |
"hf_repo": "lighteval/mmlu", | |
"hf_subset": "moral_disputes", | |
"metric": [ | |
"loglikelihood_acc" | |
], | |
"hf_avail_splits": [ | |
"auxiliary_train", | |
"test", | |
"validation", | |
"dev" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": "dev", | |
"few_shots_select": "sequential", | |
"generation_size": 1, | |
"stop_sequence": [ | |
"\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"leaderboard", | |
"mmlu" | |
], | |
"original_num_docs": 346, | |
"effective_num_docs": 346, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"leaderboard|mmlu:moral_scenarios": { | |
"name": "mmlu:moral_scenarios", | |
"prompt_function": "mmlu_harness", | |
"hf_repo": "lighteval/mmlu", | |
"hf_subset": "moral_scenarios", | |
"metric": [ | |
"loglikelihood_acc" | |
], | |
"hf_avail_splits": [ | |
"auxiliary_train", | |
"test", | |
"validation", | |
"dev" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": "dev", | |
"few_shots_select": "sequential", | |
"generation_size": 1, | |
"stop_sequence": [ | |
"\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"leaderboard", | |
"mmlu" | |
], | |
"original_num_docs": 895, | |
"effective_num_docs": 895, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"leaderboard|mmlu:nutrition": { | |
"name": "mmlu:nutrition", | |
"prompt_function": "mmlu_harness", | |
"hf_repo": "lighteval/mmlu", | |
"hf_subset": "nutrition", | |
"metric": [ | |
"loglikelihood_acc" | |
], | |
"hf_avail_splits": [ | |
"auxiliary_train", | |
"test", | |
"validation", | |
"dev" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": "dev", | |
"few_shots_select": "sequential", | |
"generation_size": 1, | |
"stop_sequence": [ | |
"\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"leaderboard", | |
"mmlu" | |
], | |
"original_num_docs": 306, | |
"effective_num_docs": 306, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"leaderboard|mmlu:philosophy": { | |
"name": "mmlu:philosophy", | |
"prompt_function": "mmlu_harness", | |
"hf_repo": "lighteval/mmlu", | |
"hf_subset": "philosophy", | |
"metric": [ | |
"loglikelihood_acc" | |
], | |
"hf_avail_splits": [ | |
"auxiliary_train", | |
"test", | |
"validation", | |
"dev" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": "dev", | |
"few_shots_select": "sequential", | |
"generation_size": 1, | |
"stop_sequence": [ | |
"\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"leaderboard", | |
"mmlu" | |
], | |
"original_num_docs": 311, | |
"effective_num_docs": 311, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"leaderboard|mmlu:prehistory": { | |
"name": "mmlu:prehistory", | |
"prompt_function": "mmlu_harness", | |
"hf_repo": "lighteval/mmlu", | |
"hf_subset": "prehistory", | |
"metric": [ | |
"loglikelihood_acc" | |
], | |
"hf_avail_splits": [ | |
"auxiliary_train", | |
"test", | |
"validation", | |
"dev" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": "dev", | |
"few_shots_select": "sequential", | |
"generation_size": 1, | |
"stop_sequence": [ | |
"\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"leaderboard", | |
"mmlu" | |
], | |
"original_num_docs": 324, | |
"effective_num_docs": 324, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"leaderboard|mmlu:professional_accounting": { | |
"name": "mmlu:professional_accounting", | |
"prompt_function": "mmlu_harness", | |
"hf_repo": "lighteval/mmlu", | |
"hf_subset": "professional_accounting", | |
"metric": [ | |
"loglikelihood_acc" | |
], | |
"hf_avail_splits": [ | |
"auxiliary_train", | |
"test", | |
"validation", | |
"dev" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": "dev", | |
"few_shots_select": "sequential", | |
"generation_size": 1, | |
"stop_sequence": [ | |
"\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"leaderboard", | |
"mmlu" | |
], | |
"original_num_docs": 282, | |
"effective_num_docs": 282, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"leaderboard|mmlu:professional_law": { | |
"name": "mmlu:professional_law", | |
"prompt_function": "mmlu_harness", | |
"hf_repo": "lighteval/mmlu", | |
"hf_subset": "professional_law", | |
"metric": [ | |
"loglikelihood_acc" | |
], | |
"hf_avail_splits": [ | |
"auxiliary_train", | |
"test", | |
"validation", | |
"dev" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": "dev", | |
"few_shots_select": "sequential", | |
"generation_size": 1, | |
"stop_sequence": [ | |
"\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"leaderboard", | |
"mmlu" | |
], | |
"original_num_docs": 1534, | |
"effective_num_docs": 1534, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"leaderboard|mmlu:professional_medicine": { | |
"name": "mmlu:professional_medicine", | |
"prompt_function": "mmlu_harness", | |
"hf_repo": "lighteval/mmlu", | |
"hf_subset": "professional_medicine", | |
"metric": [ | |
"loglikelihood_acc" | |
], | |
"hf_avail_splits": [ | |
"auxiliary_train", | |
"test", | |
"validation", | |
"dev" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": "dev", | |
"few_shots_select": "sequential", | |
"generation_size": 1, | |
"stop_sequence": [ | |
"\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"leaderboard", | |
"mmlu" | |
], | |
"original_num_docs": 272, | |
"effective_num_docs": 272, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"leaderboard|mmlu:professional_psychology": { | |
"name": "mmlu:professional_psychology", | |
"prompt_function": "mmlu_harness", | |
"hf_repo": "lighteval/mmlu", | |
"hf_subset": "professional_psychology", | |
"metric": [ | |
"loglikelihood_acc" | |
], | |
"hf_avail_splits": [ | |
"auxiliary_train", | |
"test", | |
"validation", | |
"dev" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": "dev", | |
"few_shots_select": "sequential", | |
"generation_size": 1, | |
"stop_sequence": [ | |
"\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"leaderboard", | |
"mmlu" | |
], | |
"original_num_docs": 612, | |
"effective_num_docs": 612, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"leaderboard|mmlu:public_relations": { | |
"name": "mmlu:public_relations", | |
"prompt_function": "mmlu_harness", | |
"hf_repo": "lighteval/mmlu", | |
"hf_subset": "public_relations", | |
"metric": [ | |
"loglikelihood_acc" | |
], | |
"hf_avail_splits": [ | |
"auxiliary_train", | |
"test", | |
"validation", | |
"dev" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": "dev", | |
"few_shots_select": "sequential", | |
"generation_size": 1, | |
"stop_sequence": [ | |
"\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"leaderboard", | |
"mmlu" | |
], | |
"original_num_docs": 110, | |
"effective_num_docs": 110, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"leaderboard|mmlu:security_studies": { | |
"name": "mmlu:security_studies", | |
"prompt_function": "mmlu_harness", | |
"hf_repo": "lighteval/mmlu", | |
"hf_subset": "security_studies", | |
"metric": [ | |
"loglikelihood_acc" | |
], | |
"hf_avail_splits": [ | |
"auxiliary_train", | |
"test", | |
"validation", | |
"dev" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": "dev", | |
"few_shots_select": "sequential", | |
"generation_size": 1, | |
"stop_sequence": [ | |
"\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"leaderboard", | |
"mmlu" | |
], | |
"original_num_docs": 245, | |
"effective_num_docs": 245, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"leaderboard|mmlu:sociology": { | |
"name": "mmlu:sociology", | |
"prompt_function": "mmlu_harness", | |
"hf_repo": "lighteval/mmlu", | |
"hf_subset": "sociology", | |
"metric": [ | |
"loglikelihood_acc" | |
], | |
"hf_avail_splits": [ | |
"auxiliary_train", | |
"test", | |
"validation", | |
"dev" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": "dev", | |
"few_shots_select": "sequential", | |
"generation_size": 1, | |
"stop_sequence": [ | |
"\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"leaderboard", | |
"mmlu" | |
], | |
"original_num_docs": 201, | |
"effective_num_docs": 201, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"leaderboard|mmlu:us_foreign_policy": { | |
"name": "mmlu:us_foreign_policy", | |
"prompt_function": "mmlu_harness", | |
"hf_repo": "lighteval/mmlu", | |
"hf_subset": "us_foreign_policy", | |
"metric": [ | |
"loglikelihood_acc" | |
], | |
"hf_avail_splits": [ | |
"auxiliary_train", | |
"test", | |
"validation", | |
"dev" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": "dev", | |
"few_shots_select": "sequential", | |
"generation_size": 1, | |
"stop_sequence": [ | |
"\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"leaderboard", | |
"mmlu" | |
], | |
"original_num_docs": 100, | |
"effective_num_docs": 100, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"leaderboard|mmlu:virology": { | |
"name": "mmlu:virology", | |
"prompt_function": "mmlu_harness", | |
"hf_repo": "lighteval/mmlu", | |
"hf_subset": "virology", | |
"metric": [ | |
"loglikelihood_acc" | |
], | |
"hf_avail_splits": [ | |
"auxiliary_train", | |
"test", | |
"validation", | |
"dev" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": "dev", | |
"few_shots_select": "sequential", | |
"generation_size": 1, | |
"stop_sequence": [ | |
"\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"leaderboard", | |
"mmlu" | |
], | |
"original_num_docs": 166, | |
"effective_num_docs": 166, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"leaderboard|mmlu:world_religions": { | |
"name": "mmlu:world_religions", | |
"prompt_function": "mmlu_harness", | |
"hf_repo": "lighteval/mmlu", | |
"hf_subset": "world_religions", | |
"metric": [ | |
"loglikelihood_acc" | |
], | |
"hf_avail_splits": [ | |
"auxiliary_train", | |
"test", | |
"validation", | |
"dev" | |
], | |
"evaluation_splits": [ | |
"test" | |
], | |
"few_shots_split": "dev", | |
"few_shots_select": "sequential", | |
"generation_size": 1, | |
"stop_sequence": [ | |
"\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"leaderboard", | |
"mmlu" | |
], | |
"original_num_docs": 171, | |
"effective_num_docs": 171, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
} | |
}, | |
"summary_tasks": { | |
"leaderboard|mmlu:abstract_algebra|5": { | |
"hashes": { | |
"hash_examples": "4c76229e00c9c0e9", | |
"hash_full_prompts": "8df48d575e35cf21", | |
"hash_input_tokens": "f60208a8d534afe8", | |
"hash_cont_tokens": "17b868b63507f9a3" | |
}, | |
"truncated": 0, | |
"non_truncated": 100, | |
"padded": 400, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"leaderboard|mmlu:anatomy|5": { | |
"hashes": { | |
"hash_examples": "6a1f8104dccbd33b", | |
"hash_full_prompts": "93984c7ca5fec382", | |
"hash_input_tokens": "b99dfc56ac0f8094", | |
"hash_cont_tokens": "a52a4f60d98cbe5c" | |
}, | |
"truncated": 0, | |
"non_truncated": 135, | |
"padded": 540, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"leaderboard|mmlu:astronomy|5": { | |
"hashes": { | |
"hash_examples": "1302effa3a76ce4c", | |
"hash_full_prompts": "70f7607490c705be", | |
"hash_input_tokens": "10e61e265e540280", | |
"hash_cont_tokens": "10f7d8eeba97841d" | |
}, | |
"truncated": 0, | |
"non_truncated": 152, | |
"padded": 608, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"leaderboard|mmlu:business_ethics|5": { | |
"hashes": { | |
"hash_examples": "03cb8bce5336419a", | |
"hash_full_prompts": "ba51f38e55d7ab6d", | |
"hash_input_tokens": "ba6ddd52f2d00742", | |
"hash_cont_tokens": "17b868b63507f9a3" | |
}, | |
"truncated": 0, | |
"non_truncated": 100, | |
"padded": 400, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"leaderboard|mmlu:clinical_knowledge|5": { | |
"hashes": { | |
"hash_examples": "ffbb9c7b2be257f9", | |
"hash_full_prompts": "b1c66a79a61290ed", | |
"hash_input_tokens": "f893d5bf1e451105", | |
"hash_cont_tokens": "edef9975ba9165b5" | |
}, | |
"truncated": 0, | |
"non_truncated": 265, | |
"padded": 1060, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"leaderboard|mmlu:college_biology|5": { | |
"hashes": { | |
"hash_examples": "3ee77f176f38eb8e", | |
"hash_full_prompts": "77e6a0aec610d422", | |
"hash_input_tokens": "bd2c95a882ea978f", | |
"hash_cont_tokens": "0aa103ec6602280b" | |
}, | |
"truncated": 0, | |
"non_truncated": 144, | |
"padded": 576, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"leaderboard|mmlu:college_chemistry|5": { | |
"hashes": { | |
"hash_examples": "ce61a69c46d47aeb", | |
"hash_full_prompts": "af7c29a0245adcf6", | |
"hash_input_tokens": "76c9b6da71ab77f4", | |
"hash_cont_tokens": "17b868b63507f9a3" | |
}, | |
"truncated": 0, | |
"non_truncated": 100, | |
"padded": 400, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"leaderboard|mmlu:college_computer_science|5": { | |
"hashes": { | |
"hash_examples": "32805b52d7d5daab", | |
"hash_full_prompts": "722a131b59e8f2a6", | |
"hash_input_tokens": "15a5e64c2e774f1e", | |
"hash_cont_tokens": "17b868b63507f9a3" | |
}, | |
"truncated": 0, | |
"non_truncated": 100, | |
"padded": 400, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"leaderboard|mmlu:college_mathematics|5": { | |
"hashes": { | |
"hash_examples": "55da1a0a0bd33722", | |
"hash_full_prompts": "4ef0b215b7cf059f", | |
"hash_input_tokens": "daf3ef12482b8078", | |
"hash_cont_tokens": "17b868b63507f9a3" | |
}, | |
"truncated": 0, | |
"non_truncated": 100, | |
"padded": 400, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"leaderboard|mmlu:college_medicine|5": { | |
"hashes": { | |
"hash_examples": "c33e143163049176", | |
"hash_full_prompts": "286b5980645a2841", | |
"hash_input_tokens": "61b1df42bb916377", | |
"hash_cont_tokens": "1979021dbc698754" | |
}, | |
"truncated": 0, | |
"non_truncated": 173, | |
"padded": 692, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"leaderboard|mmlu:college_physics|5": { | |
"hashes": { | |
"hash_examples": "ebdab1cdb7e555df", | |
"hash_full_prompts": "afd1f6acc7205218", | |
"hash_input_tokens": "c4667120c0420859", | |
"hash_cont_tokens": "7cf7fe2bab00acbd" | |
}, | |
"truncated": 0, | |
"non_truncated": 102, | |
"padded": 408, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"leaderboard|mmlu:computer_security|5": { | |
"hashes": { | |
"hash_examples": "a24fd7d08a560921", | |
"hash_full_prompts": "34ec751656775d82", | |
"hash_input_tokens": "b3ab68602e5760c4", | |
"hash_cont_tokens": "17b868b63507f9a3" | |
}, | |
"truncated": 0, | |
"non_truncated": 100, | |
"padded": 400, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"leaderboard|mmlu:conceptual_physics|5": { | |
"hashes": { | |
"hash_examples": "8300977a79386993", | |
"hash_full_prompts": "284073e2d3b24d90", | |
"hash_input_tokens": "0e76f509a1db508f", | |
"hash_cont_tokens": "903f64eed2b0d217" | |
}, | |
"truncated": 0, | |
"non_truncated": 235, | |
"padded": 940, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"leaderboard|mmlu:econometrics|5": { | |
"hashes": { | |
"hash_examples": "ddde36788a04a46f", | |
"hash_full_prompts": "a32778a5446b4dab", | |
"hash_input_tokens": "72d27208825bed01", | |
"hash_cont_tokens": "721ae6c5302c4bf2" | |
}, | |
"truncated": 0, | |
"non_truncated": 114, | |
"padded": 455, | |
"non_padded": 1, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"leaderboard|mmlu:electrical_engineering|5": { | |
"hashes": { | |
"hash_examples": "acbc5def98c19b3f", | |
"hash_full_prompts": "b12325c15917c049", | |
"hash_input_tokens": "265f1f8f1d298494", | |
"hash_cont_tokens": "15a738960ed3e587" | |
}, | |
"truncated": 0, | |
"non_truncated": 145, | |
"padded": 580, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"leaderboard|mmlu:elementary_mathematics|5": { | |
"hashes": { | |
"hash_examples": "146e61d07497a9bd", | |
"hash_full_prompts": "4f9860a5fb6f7609", | |
"hash_input_tokens": "d3067fe8df78d2d0", | |
"hash_cont_tokens": "c96470462fc71683" | |
}, | |
"truncated": 0, | |
"non_truncated": 378, | |
"padded": 1512, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"leaderboard|mmlu:formal_logic|5": { | |
"hashes": { | |
"hash_examples": "8635216e1909a03f", | |
"hash_full_prompts": "01b914ccbb39a142", | |
"hash_input_tokens": "700bbf7f0aedd716", | |
"hash_cont_tokens": "0e1ce025c9d6ee7e" | |
}, | |
"truncated": 0, | |
"non_truncated": 126, | |
"padded": 500, | |
"non_padded": 4, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"leaderboard|mmlu:global_facts|5": { | |
"hashes": { | |
"hash_examples": "30b315aa6353ee47", | |
"hash_full_prompts": "812f3767d82991c2", | |
"hash_input_tokens": "f8a3865a79eaed7d", | |
"hash_cont_tokens": "17b868b63507f9a3" | |
}, | |
"truncated": 0, | |
"non_truncated": 100, | |
"padded": 400, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"leaderboard|mmlu:high_school_biology|5": { | |
"hashes": { | |
"hash_examples": "c9136373af2180de", | |
"hash_full_prompts": "062791ed55812179", | |
"hash_input_tokens": "e68141c3bcd3eb4f", | |
"hash_cont_tokens": "e34d57f7d3c4ca16" | |
}, | |
"truncated": 0, | |
"non_truncated": 310, | |
"padded": 1240, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"leaderboard|mmlu:high_school_chemistry|5": { | |
"hashes": { | |
"hash_examples": "b0661bfa1add6404", | |
"hash_full_prompts": "f856a13619fab2ef", | |
"hash_input_tokens": "0ffd067f35ae7ead", | |
"hash_cont_tokens": "e8482d44df4b3740" | |
}, | |
"truncated": 0, | |
"non_truncated": 203, | |
"padded": 812, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"leaderboard|mmlu:high_school_computer_science|5": { | |
"hashes": { | |
"hash_examples": "80fc1d623a3d665f", | |
"hash_full_prompts": "f32bb0406a0811c5", | |
"hash_input_tokens": "cac62c25d2eedd8a", | |
"hash_cont_tokens": "17b868b63507f9a3" | |
}, | |
"truncated": 0, | |
"non_truncated": 100, | |
"padded": 400, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"leaderboard|mmlu:high_school_european_history|5": { | |
"hashes": { | |
"hash_examples": "854da6e5af0fe1a1", | |
"hash_full_prompts": "60bd22b3c349824f", | |
"hash_input_tokens": "a4ae54f6efe21f68", | |
"hash_cont_tokens": "d63e679a49418339" | |
}, | |
"truncated": 0, | |
"non_truncated": 165, | |
"padded": 656, | |
"non_padded": 4, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"leaderboard|mmlu:high_school_geography|5": { | |
"hashes": { | |
"hash_examples": "7dc963c7acd19ad8", | |
"hash_full_prompts": "7bd24dc4f97e5037", | |
"hash_input_tokens": "a9fd51319c2f613d", | |
"hash_cont_tokens": "d78483e286d06f1a" | |
}, | |
"truncated": 0, | |
"non_truncated": 198, | |
"padded": 792, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"leaderboard|mmlu:high_school_government_and_politics|5": { | |
"hashes": { | |
"hash_examples": "1f675dcdebc9758f", | |
"hash_full_prompts": "6191145931a3f07b", | |
"hash_input_tokens": "b4a890ab45eeb46c", | |
"hash_cont_tokens": "691cdff71ff5fe57" | |
}, | |
"truncated": 0, | |
"non_truncated": 193, | |
"padded": 772, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"leaderboard|mmlu:high_school_macroeconomics|5": { | |
"hashes": { | |
"hash_examples": "2fb32cf2d80f0b35", | |
"hash_full_prompts": "a361964c32e81cb7", | |
"hash_input_tokens": "6d393396cbebdef4", | |
"hash_cont_tokens": "d5ad4c5bdca967ad" | |
}, | |
"truncated": 0, | |
"non_truncated": 390, | |
"padded": 1560, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"leaderboard|mmlu:high_school_mathematics|5": { | |
"hashes": { | |
"hash_examples": "fd6646fdb5d58a1f", | |
"hash_full_prompts": "84d60c6b3dc583c3", | |
"hash_input_tokens": "94c790a33419b6d3", | |
"hash_cont_tokens": "8f631ca5687dd0d4" | |
}, | |
"truncated": 0, | |
"non_truncated": 270, | |
"padded": 1080, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"leaderboard|mmlu:high_school_microeconomics|5": { | |
"hashes": { | |
"hash_examples": "2118f21f71d87d84", | |
"hash_full_prompts": "1dc9d09e21f9dcaf", | |
"hash_input_tokens": "9f5af0e55d0b8d98", | |
"hash_cont_tokens": "7321048a28451473" | |
}, | |
"truncated": 0, | |
"non_truncated": 238, | |
"padded": 952, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"leaderboard|mmlu:high_school_physics|5": { | |
"hashes": { | |
"hash_examples": "dc3ce06378548565", | |
"hash_full_prompts": "30a871dda941dce7", | |
"hash_input_tokens": "ab83f74068c3d258", | |
"hash_cont_tokens": "bb137581f269861c" | |
}, | |
"truncated": 0, | |
"non_truncated": 151, | |
"padded": 604, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"leaderboard|mmlu:high_school_psychology|5": { | |
"hashes": { | |
"hash_examples": "c8d1d98a40e11f2f", | |
"hash_full_prompts": "0983abb9633b1b6b", | |
"hash_input_tokens": "0f2a8a60a060256b", | |
"hash_cont_tokens": "b455cab2675bd863" | |
}, | |
"truncated": 0, | |
"non_truncated": 545, | |
"padded": 2180, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"leaderboard|mmlu:high_school_statistics|5": { | |
"hashes": { | |
"hash_examples": "666c8759b98ee4ff", | |
"hash_full_prompts": "ea4d992455c02e2e", | |
"hash_input_tokens": "ea0fb0a33c41cb44", | |
"hash_cont_tokens": "1b3196fec7e58037" | |
}, | |
"truncated": 0, | |
"non_truncated": 216, | |
"padded": 864, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"leaderboard|mmlu:high_school_us_history|5": { | |
"hashes": { | |
"hash_examples": "95fef1c4b7d3f81e", | |
"hash_full_prompts": "49115a06feb5158d", | |
"hash_input_tokens": "4c3a2a3f4609cf93", | |
"hash_cont_tokens": "a331dedc2aa01b3e" | |
}, | |
"truncated": 0, | |
"non_truncated": 204, | |
"padded": 816, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"leaderboard|mmlu:high_school_world_history|5": { | |
"hashes": { | |
"hash_examples": "7e5085b6184b0322", | |
"hash_full_prompts": "18924265044ab474", | |
"hash_input_tokens": "03139383d6a9a663", | |
"hash_cont_tokens": "d0fbe030b8c8c2bf" | |
}, | |
"truncated": 0, | |
"non_truncated": 237, | |
"padded": 948, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"leaderboard|mmlu:human_aging|5": { | |
"hashes": { | |
"hash_examples": "c17333e7c7c10797", | |
"hash_full_prompts": "74ba5b78031d35d6", | |
"hash_input_tokens": "742a9464c1ae6345", | |
"hash_cont_tokens": "1dd29c3755494850" | |
}, | |
"truncated": 0, | |
"non_truncated": 223, | |
"padded": 892, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"leaderboard|mmlu:human_sexuality|5": { | |
"hashes": { | |
"hash_examples": "4edd1e9045df5e3d", | |
"hash_full_prompts": "d557d54518d646b9", | |
"hash_input_tokens": "f036df949998edfd", | |
"hash_cont_tokens": "c85573f663c10691" | |
}, | |
"truncated": 0, | |
"non_truncated": 131, | |
"padded": 524, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"leaderboard|mmlu:international_law|5": { | |
"hashes": { | |
"hash_examples": "db2fa00d771a062a", | |
"hash_full_prompts": "7a57f2d61a2ad864", | |
"hash_input_tokens": "864645470a52a382", | |
"hash_cont_tokens": "d263804ba918154f" | |
}, | |
"truncated": 0, | |
"non_truncated": 121, | |
"padded": 476, | |
"non_padded": 8, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"leaderboard|mmlu:jurisprudence|5": { | |
"hashes": { | |
"hash_examples": "e956f86b124076fe", | |
"hash_full_prompts": "660a21a643b2b148", | |
"hash_input_tokens": "b4ce535448aacd40", | |
"hash_cont_tokens": "581986691a84ece8" | |
}, | |
"truncated": 0, | |
"non_truncated": 108, | |
"padded": 425, | |
"non_padded": 7, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"leaderboard|mmlu:logical_fallacies|5": { | |
"hashes": { | |
"hash_examples": "956e0e6365ab79f1", | |
"hash_full_prompts": "4339b977916ccc25", | |
"hash_input_tokens": "077bb9827109b262", | |
"hash_cont_tokens": "55a858b28bbda458" | |
}, | |
"truncated": 0, | |
"non_truncated": 163, | |
"padded": 632, | |
"non_padded": 20, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"leaderboard|mmlu:machine_learning|5": { | |
"hashes": { | |
"hash_examples": "397997cc6f4d581e", | |
"hash_full_prompts": "20ce8dbbd8b49a27", | |
"hash_input_tokens": "1045f472b0413443", | |
"hash_cont_tokens": "e99d3d3efd4ac7a3" | |
}, | |
"truncated": 0, | |
"non_truncated": 112, | |
"padded": 444, | |
"non_padded": 4, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"leaderboard|mmlu:management|5": { | |
"hashes": { | |
"hash_examples": "2bcbe6f6ca63d740", | |
"hash_full_prompts": "cb8bb17b91e33244", | |
"hash_input_tokens": "de25bec16502f3e9", | |
"hash_cont_tokens": "13d9dc56bca34726" | |
}, | |
"truncated": 0, | |
"non_truncated": 103, | |
"padded": 412, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"leaderboard|mmlu:marketing|5": { | |
"hashes": { | |
"hash_examples": "8ddb20d964a1b065", | |
"hash_full_prompts": "453573e88ac246f9", | |
"hash_input_tokens": "4959bf6c02de27d0", | |
"hash_cont_tokens": "2700ea26933916a2" | |
}, | |
"truncated": 0, | |
"non_truncated": 234, | |
"padded": 912, | |
"non_padded": 24, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"leaderboard|mmlu:medical_genetics|5": { | |
"hashes": { | |
"hash_examples": "182a71f4763d2cea", | |
"hash_full_prompts": "00fd3fa001f4c5ee", | |
"hash_input_tokens": "a7e7ea3bcfa7fa27", | |
"hash_cont_tokens": "17b868b63507f9a3" | |
}, | |
"truncated": 0, | |
"non_truncated": 100, | |
"padded": 400, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"leaderboard|mmlu:miscellaneous|5": { | |
"hashes": { | |
"hash_examples": "4c404fdbb4ca57fc", | |
"hash_full_prompts": "c769390c12be534e", | |
"hash_input_tokens": "dbdbdb65488c6714", | |
"hash_cont_tokens": "7bf4341c79587250" | |
}, | |
"truncated": 0, | |
"non_truncated": 783, | |
"padded": 3132, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"leaderboard|mmlu:moral_disputes|5": { | |
"hashes": { | |
"hash_examples": "60cbd2baa3fea5c9", | |
"hash_full_prompts": "60f275023cddc75a", | |
"hash_input_tokens": "0e4cdc61726030c9", | |
"hash_cont_tokens": "38a48e9de6976f00" | |
}, | |
"truncated": 0, | |
"non_truncated": 346, | |
"padded": 1384, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"leaderboard|mmlu:moral_scenarios|5": { | |
"hashes": { | |
"hash_examples": "fd8b0431fbdd75ef", | |
"hash_full_prompts": "96fe45f488526432", | |
"hash_input_tokens": "5937f8d106b12b1b", | |
"hash_cont_tokens": "761c4dc187689d89" | |
}, | |
"truncated": 0, | |
"non_truncated": 895, | |
"padded": 3528, | |
"non_padded": 52, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"leaderboard|mmlu:nutrition|5": { | |
"hashes": { | |
"hash_examples": "71e55e2b829b6528", | |
"hash_full_prompts": "391da317a6ebabc9", | |
"hash_input_tokens": "73f28055af1a06b1", | |
"hash_cont_tokens": "65005bd7d6f6012a" | |
}, | |
"truncated": 0, | |
"non_truncated": 306, | |
"padded": 1220, | |
"non_padded": 4, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"leaderboard|mmlu:philosophy|5": { | |
"hashes": { | |
"hash_examples": "a6d489a8d208fa4b", | |
"hash_full_prompts": "0f43b07df0c2564c", | |
"hash_input_tokens": "6854552448d4c08d", | |
"hash_cont_tokens": "0b47934fb6314dec" | |
}, | |
"truncated": 0, | |
"non_truncated": 311, | |
"padded": 1244, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"leaderboard|mmlu:prehistory|5": { | |
"hashes": { | |
"hash_examples": "6cc50f032a19acaa", | |
"hash_full_prompts": "daa46dc163e9b69a", | |
"hash_input_tokens": "b73a40ea5285a351", | |
"hash_cont_tokens": "3f20acd855ee0a29" | |
}, | |
"truncated": 0, | |
"non_truncated": 324, | |
"padded": 1294, | |
"non_padded": 2, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"leaderboard|mmlu:professional_accounting|5": { | |
"hashes": { | |
"hash_examples": "50f57ab32f5f6cea", | |
"hash_full_prompts": "4757f9bae82a5c16", | |
"hash_input_tokens": "fda5460b5ad0c7df", | |
"hash_cont_tokens": "8f122ba881355d4b" | |
}, | |
"truncated": 0, | |
"non_truncated": 282, | |
"padded": 1112, | |
"non_padded": 16, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"leaderboard|mmlu:professional_law|5": { | |
"hashes": { | |
"hash_examples": "a8fdc85c64f4b215", | |
"hash_full_prompts": "3f66021f850be52a", | |
"hash_input_tokens": "8dc2c724d6b52c9e", | |
"hash_cont_tokens": "90d5df417c4d3fd3" | |
}, | |
"truncated": 0, | |
"non_truncated": 1534, | |
"padded": 6136, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"leaderboard|mmlu:professional_medicine|5": { | |
"hashes": { | |
"hash_examples": "c373a28a3050a73a", | |
"hash_full_prompts": "b3aad1a4213b21e1", | |
"hash_input_tokens": "26450594e11b4cc6", | |
"hash_cont_tokens": "4a2d2988884f7f70" | |
}, | |
"truncated": 0, | |
"non_truncated": 272, | |
"padded": 1088, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"leaderboard|mmlu:professional_psychology|5": { | |
"hashes": { | |
"hash_examples": "bf5254fe818356af", | |
"hash_full_prompts": "163da0126bc9800f", | |
"hash_input_tokens": "7b5d7462d97de493", | |
"hash_cont_tokens": "e0a952cb8a9c81de" | |
}, | |
"truncated": 0, | |
"non_truncated": 612, | |
"padded": 2448, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"leaderboard|mmlu:public_relations|5": { | |
"hashes": { | |
"hash_examples": "b66d52e28e7d14e0", | |
"hash_full_prompts": "91c1610c8bf19b8b", | |
"hash_input_tokens": "3cd1c595e7886cc6", | |
"hash_cont_tokens": "1fa77a8dff3922b8" | |
}, | |
"truncated": 0, | |
"non_truncated": 110, | |
"padded": 432, | |
"non_padded": 8, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"leaderboard|mmlu:security_studies|5": { | |
"hashes": { | |
"hash_examples": "514c14feaf000ad9", | |
"hash_full_prompts": "49a870ff40d6a48a", | |
"hash_input_tokens": "198ed48bb49fc97b", | |
"hash_cont_tokens": "81fc9cb3cbdd52db" | |
}, | |
"truncated": 0, | |
"non_truncated": 245, | |
"padded": 980, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"leaderboard|mmlu:sociology|5": { | |
"hashes": { | |
"hash_examples": "f6c9bc9d18c80870", | |
"hash_full_prompts": "7357a98879032a39", | |
"hash_input_tokens": "fbd901faea09bbae", | |
"hash_cont_tokens": "2a0493252ed2cf43" | |
}, | |
"truncated": 0, | |
"non_truncated": 201, | |
"padded": 800, | |
"non_padded": 4, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"leaderboard|mmlu:us_foreign_policy|5": { | |
"hashes": { | |
"hash_examples": "ed7b78629db6678f", | |
"hash_full_prompts": "c8396caa957ecf85", | |
"hash_input_tokens": "088fc097a2f03146", | |
"hash_cont_tokens": "17b868b63507f9a3" | |
}, | |
"truncated": 0, | |
"non_truncated": 100, | |
"padded": 396, | |
"non_padded": 4, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"leaderboard|mmlu:virology|5": { | |
"hashes": { | |
"hash_examples": "bc52ffdc3f9b994a", | |
"hash_full_prompts": "145afa2914198bbc", | |
"hash_input_tokens": "4536fddcbb88e4c5", | |
"hash_cont_tokens": "5ab892d003b00c98" | |
}, | |
"truncated": 0, | |
"non_truncated": 166, | |
"padded": 664, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"leaderboard|mmlu:world_religions|5": { | |
"hashes": { | |
"hash_examples": "ecdb4a4f94f62930", | |
"hash_full_prompts": "5e3f2463d253d254", | |
"hash_input_tokens": "1811e38297227fbb", | |
"hash_cont_tokens": "15a5e5dbdfbb8568" | |
}, | |
"truncated": 0, | |
"non_truncated": 171, | |
"padded": 684, | |
"non_padded": 0, | |
"effective_few_shots": 5.0, | |
"num_truncated_few_shots": 0 | |
} | |
}, | |
"summary_general": { | |
"hashes": { | |
"hash_examples": "341a076d0beb7048", | |
"hash_full_prompts": "269358980048fbaa", | |
"hash_input_tokens": "22fa045638db0688", | |
"hash_cont_tokens": "c24d2aedc1261d61" | |
}, | |
"truncated": 0, | |
"non_truncated": 14042, | |
"padded": 56006, | |
"non_padded": 162, | |
"num_truncated_few_shots": 0 | |
} | |
} |