|
RESULTS_DATASET_ID = "datasets/open-llm-leaderboard/results" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
DETAILS_DATASET_ID = "datasets/open-llm-leaderboard/{model_name_sanitized}-details" |
|
DETAILS_FILENAME = "samples_{subtask}_*.json" |
|
TASKS = { |
|
"leaderboard_arc_challenge": ("ARC", "leaderboard_arc_challenge"), |
|
"leaderboard_bbh": ("BBH", "leaderboard_bbh"), |
|
"leaderboard_gpqa": ("GPQA", "leaderboard_gpqa"), |
|
"leaderboard_ifeval": ("IFEval", "leaderboard_ifeval"), |
|
"leaderboard_math_hard": ("MATH", "leaderboard_math"), |
|
"leaderboard_mmlu_pro": ("MMLU-Pro", "leaderboard_mmlu_pro"), |
|
"leaderboard_musr": ("MuSR", "leaderboard_musr"), |
|
} |
|
SUBTASKS = { |
|
"leaderboard_arc_challenge": ["leaderboard_arc_challenge"], |
|
"leaderboard_bbh": [ |
|
"leaderboard_bbh_boolean_expressions", |
|
"leaderboard_bbh_causal_judgement", |
|
"leaderboard_bbh_date_understanding", |
|
"leaderboard_bbh_disambiguation_qa", |
|
"leaderboard_bbh_formal_fallacies", |
|
"leaderboard_bbh_geometric_shapes", |
|
"leaderboard_bbh_hyperbaton", |
|
"leaderboard_bbh_logical_deduction_five_objects", |
|
"leaderboard_bbh_logical_deduction_seven_objects", |
|
"leaderboard_bbh_logical_deduction_three_objects", |
|
"leaderboard_bbh_movie_recommendation", |
|
"leaderboard_bbh_navigate", |
|
"leaderboard_bbh_object_counting", |
|
"leaderboard_bbh_penguins_in_a_table", |
|
"leaderboard_bbh_reasoning_about_colored_objects", |
|
"leaderboard_bbh_ruin_names", |
|
"leaderboard_bbh_salient_translation_error_detection", |
|
"leaderboard_bbh_snarks", "leaderboard_bbh_sports_understanding", |
|
"leaderboard_bbh_temporal_sequences", |
|
"leaderboard_bbh_tracking_shuffled_objects_five_objects", |
|
"leaderboard_bbh_tracking_shuffled_objects_seven_objects", |
|
"leaderboard_bbh_tracking_shuffled_objects_three_objects", |
|
"leaderboard_bbh_web_of_lies", |
|
], |
|
"leaderboard_gpqa": [ |
|
"leaderboard_gpqa_extended", |
|
"leaderboard_gpqa_diamond", |
|
"leaderboard_gpqa_main", |
|
], |
|
"leaderboard_ifeval": ["leaderboard_ifeval"], |
|
|
|
"leaderboard_math": [ |
|
"leaderboard_math_algebra_hard", |
|
"leaderboard_math_counting_and_prob_hard", |
|
"leaderboard_math_geometry_hard", |
|
"leaderboard_math_intermediate_algebra_hard", |
|
"leaderboard_math_num_theory_hard", |
|
"leaderboard_math_prealgebra_hard", |
|
"leaderboard_math_precalculus_hard", |
|
], |
|
"leaderboard_mmlu_pro": ["leaderboard_mmlu_pro"], |
|
"leaderboard_musr": [ |
|
"leaderboard_musr_murder_mysteries", |
|
"leaderboard_musr_object_placements", |
|
"leaderboard_musr_team_allocation", |
|
], |
|
} |
|
|