open-r1-eval-leaderboard
/
eval_results
/edbeeching
/mixtral-8x7b-instruct-v0.1_merged
/v0.1
/bbh
/results_2024-04-14T12-32-13.013639.json

edbeeching
HF staff
Upload eval_results/edbeeching/mixtral-8x7b-instruct-v0.1_merged/v0.1/bbh/results_2024-04-14T12-32-13.013639.json with huggingface_hub
30e42bd
verified
{ | |
"config_general": { | |
"lighteval_sha": "?", | |
"num_fewshot_seeds": 1, | |
"override_batch_size": 1, | |
"max_samples": null, | |
"job_id": "", | |
"start_time": 312881.658431109, | |
"end_time": 315311.625384565, | |
"total_evaluation_time_secondes": "2429.9669534559944", | |
"model_name": "edbeeching/mixtral-8x7b-instruct-v0.1_merged", | |
"model_sha": "6da964ddd4ac2f1edfa95d2a72082d87f1007cbf", | |
"model_dtype": "torch.bfloat16", | |
"model_size": "87.49 GB", | |
"config": null | |
}, | |
"results": { | |
"lighteval|bigbench:causal_judgment|0": { | |
"acc": 0.5210526315789473, | |
"acc_stderr": 0.03633739504773335 | |
}, | |
"lighteval|bigbench:date_understanding|0": { | |
"acc": 0.07588075880758807, | |
"acc_stderr": 0.013804031119092806 | |
}, | |
"lighteval|bigbench:disambiguation_qa|0": { | |
"acc": 0.4108527131782946, | |
"acc_stderr": 0.030689404327011723 | |
}, | |
"lighteval|bigbench:geometric_shapes|0": { | |
"acc": 0.11666666666666667, | |
"acc_stderr": 0.016942928579367775 | |
}, | |
"lighteval|bigbench:logical_deduction_five_objects|0": { | |
"acc": 0.2, | |
"acc_stderr": 0.01790645924143384 | |
}, | |
"lighteval|bigbench:logical_deduction_seven_objects|0": { | |
"acc": 0.14285714285714285, | |
"acc_stderr": 0.013235458703202278 | |
}, | |
"lighteval|bigbench:logical_deduction_three_objects|0": { | |
"acc": 0.3333333333333333, | |
"acc_stderr": 0.027262027336984396 | |
}, | |
"lighteval|bigbench:movie_recommendation|0": { | |
"acc": 0.248, | |
"acc_stderr": 0.019332342821239107 | |
}, | |
"lighteval|bigbench:navigate|0": { | |
"acc": 0.503, | |
"acc_stderr": 0.015819015179246724 | |
}, | |
"lighteval|bigbench:reasoning_about_colored_objects|0": { | |
"acc": 0.217, | |
"acc_stderr": 0.009219435937165706 | |
}, | |
"lighteval|bigbench:ruin_names|0": { | |
"acc": 0.16964285714285715, | |
"acc_stderr": 0.01775196735250109 | |
}, | |
"lighteval|bigbench:salient_translation_error_detection|0": { | |
"acc": 0.14829659318637275, | |
"acc_stderr": 0.011255432818843761 | |
}, | |
"lighteval|bigbench:snarks|0": { | |
"acc": 0.48066298342541436, | |
"acc_stderr": 0.037239918828977814 | |
}, | |
"lighteval|bigbench:sports_understanding|0": { | |
"acc": 0.49, | |
"acc_stderr": 0.015816135752773203 | |
}, | |
"lighteval|bigbench:temporal_sequences|0": { | |
"acc": 1.0, | |
"acc_stderr": 0.0 | |
}, | |
"lighteval|bigbench:tracking_shuffled_objects_five_objects|0": { | |
"acc": 0.1992, | |
"acc_stderr": 0.011301223921757484 | |
}, | |
"lighteval|bigbench:tracking_shuffled_objects_seven_objects|0": { | |
"acc": 0.14057142857142857, | |
"acc_stderr": 0.008311100199727682 | |
}, | |
"lighteval|bigbench:tracking_shuffled_objects_three_objects|0": { | |
"acc": 0.3333333333333333, | |
"acc_stderr": 0.027262027336984396 | |
}, | |
"lighteval|bigbench:_average|0": { | |
"acc": 0.3183528023378544, | |
"acc_stderr": 0.01830479469466906 | |
}, | |
"all": { | |
"acc": 0.3183528023378544, | |
"acc_stderr": 0.01830479469466906 | |
} | |
}, | |
"versions": { | |
"lighteval|bigbench:causal_judgment|0": 0, | |
"lighteval|bigbench:date_understanding|0": 0, | |
"lighteval|bigbench:disambiguation_qa|0": 0, | |
"lighteval|bigbench:geometric_shapes|0": 0, | |
"lighteval|bigbench:logical_deduction_five_objects|0": 0, | |
"lighteval|bigbench:logical_deduction_seven_objects|0": 0, | |
"lighteval|bigbench:logical_deduction_three_objects|0": 0, | |
"lighteval|bigbench:movie_recommendation|0": 0, | |
"lighteval|bigbench:navigate|0": 0, | |
"lighteval|bigbench:reasoning_about_colored_objects|0": 0, | |
"lighteval|bigbench:ruin_names|0": 0, | |
"lighteval|bigbench:salient_translation_error_detection|0": 0, | |
"lighteval|bigbench:snarks|0": 0, | |
"lighteval|bigbench:sports_understanding|0": 0, | |
"lighteval|bigbench:temporal_sequences|0": 0, | |
"lighteval|bigbench:tracking_shuffled_objects_five_objects|0": 0, | |
"lighteval|bigbench:tracking_shuffled_objects_seven_objects|0": 0, | |
"lighteval|bigbench:tracking_shuffled_objects_three_objects|0": 0 | |
}, | |
"config_tasks": { | |
"lighteval|bigbench:causal_judgment": { | |
"name": "bigbench:causal_judgment", | |
"prompt_function": "bbh_lighteval", | |
"hf_repo": "lighteval/bbh", | |
"hf_subset": "causal_judgement", | |
"metric": [ | |
"loglikelihood_acc_single_token" | |
], | |
"hf_avail_splits": [ | |
"train" | |
], | |
"evaluation_splits": [ | |
"train" | |
], | |
"few_shots_split": null, | |
"few_shots_select": null, | |
"generation_size": -1, | |
"stop_sequence": [ | |
"</s>", | |
"Q:", | |
"\n\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 190, | |
"effective_num_docs": 190, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|bigbench:date_understanding": { | |
"name": "bigbench:date_understanding", | |
"prompt_function": "bbh_lighteval", | |
"hf_repo": "lighteval/bbh", | |
"hf_subset": "date_understanding", | |
"metric": [ | |
"loglikelihood_acc_single_token" | |
], | |
"hf_avail_splits": [ | |
"train" | |
], | |
"evaluation_splits": [ | |
"train" | |
], | |
"few_shots_split": null, | |
"few_shots_select": null, | |
"generation_size": -1, | |
"stop_sequence": [ | |
"</s>", | |
"Q:", | |
"\n\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 369, | |
"effective_num_docs": 369, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|bigbench:disambiguation_qa": { | |
"name": "bigbench:disambiguation_qa", | |
"prompt_function": "bbh_lighteval", | |
"hf_repo": "lighteval/bbh", | |
"hf_subset": "disambiguation_qa", | |
"metric": [ | |
"loglikelihood_acc_single_token" | |
], | |
"hf_avail_splits": [ | |
"train" | |
], | |
"evaluation_splits": [ | |
"train" | |
], | |
"few_shots_split": null, | |
"few_shots_select": null, | |
"generation_size": -1, | |
"stop_sequence": [ | |
"</s>", | |
"Q:", | |
"\n\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 258, | |
"effective_num_docs": 258, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|bigbench:geometric_shapes": { | |
"name": "bigbench:geometric_shapes", | |
"prompt_function": "bbh_lighteval", | |
"hf_repo": "lighteval/bbh", | |
"hf_subset": "geometric_shapes", | |
"metric": [ | |
"loglikelihood_acc_single_token" | |
], | |
"hf_avail_splits": [ | |
"train" | |
], | |
"evaluation_splits": [ | |
"train" | |
], | |
"few_shots_split": null, | |
"few_shots_select": null, | |
"generation_size": -1, | |
"stop_sequence": [ | |
"</s>", | |
"Q:", | |
"\n\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 360, | |
"effective_num_docs": 360, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|bigbench:logical_deduction_five_objects": { | |
"name": "bigbench:logical_deduction_five_objects", | |
"prompt_function": "bbh_lighteval", | |
"hf_repo": "lighteval/bbh", | |
"hf_subset": "logical_deduction_five_objects", | |
"metric": [ | |
"loglikelihood_acc_single_token" | |
], | |
"hf_avail_splits": [ | |
"train" | |
], | |
"evaluation_splits": [ | |
"train" | |
], | |
"few_shots_split": null, | |
"few_shots_select": null, | |
"generation_size": -1, | |
"stop_sequence": [ | |
"</s>", | |
"Q:", | |
"\n\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 500, | |
"effective_num_docs": 500, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|bigbench:logical_deduction_seven_objects": { | |
"name": "bigbench:logical_deduction_seven_objects", | |
"prompt_function": "bbh_lighteval", | |
"hf_repo": "lighteval/bbh", | |
"hf_subset": "logical_deduction_seven_objects", | |
"metric": [ | |
"loglikelihood_acc_single_token" | |
], | |
"hf_avail_splits": [ | |
"train" | |
], | |
"evaluation_splits": [ | |
"train" | |
], | |
"few_shots_split": null, | |
"few_shots_select": null, | |
"generation_size": -1, | |
"stop_sequence": [ | |
"</s>", | |
"Q:", | |
"\n\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 700, | |
"effective_num_docs": 700, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|bigbench:logical_deduction_three_objects": { | |
"name": "bigbench:logical_deduction_three_objects", | |
"prompt_function": "bbh_lighteval", | |
"hf_repo": "lighteval/bbh", | |
"hf_subset": "logical_deduction_three_objects", | |
"metric": [ | |
"loglikelihood_acc_single_token" | |
], | |
"hf_avail_splits": [ | |
"train" | |
], | |
"evaluation_splits": [ | |
"train" | |
], | |
"few_shots_split": null, | |
"few_shots_select": null, | |
"generation_size": -1, | |
"stop_sequence": [ | |
"</s>", | |
"Q:", | |
"\n\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 300, | |
"effective_num_docs": 300, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|bigbench:movie_recommendation": { | |
"name": "bigbench:movie_recommendation", | |
"prompt_function": "bbh_lighteval", | |
"hf_repo": "lighteval/bbh", | |
"hf_subset": "movie_recommendation", | |
"metric": [ | |
"loglikelihood_acc_single_token" | |
], | |
"hf_avail_splits": [ | |
"train" | |
], | |
"evaluation_splits": [ | |
"train" | |
], | |
"few_shots_split": null, | |
"few_shots_select": null, | |
"generation_size": -1, | |
"stop_sequence": [ | |
"</s>", | |
"Q:", | |
"\n\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 500, | |
"effective_num_docs": 500, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|bigbench:navigate": { | |
"name": "bigbench:navigate", | |
"prompt_function": "bbh_lighteval", | |
"hf_repo": "lighteval/bbh", | |
"hf_subset": "navigate", | |
"metric": [ | |
"loglikelihood_acc_single_token" | |
], | |
"hf_avail_splits": [ | |
"train" | |
], | |
"evaluation_splits": [ | |
"train" | |
], | |
"few_shots_split": null, | |
"few_shots_select": null, | |
"generation_size": -1, | |
"stop_sequence": [ | |
"</s>", | |
"Q:", | |
"\n\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 1000, | |
"effective_num_docs": 1000, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|bigbench:reasoning_about_colored_objects": { | |
"name": "bigbench:reasoning_about_colored_objects", | |
"prompt_function": "bbh_lighteval", | |
"hf_repo": "lighteval/bbh", | |
"hf_subset": "reasoning_about_colored_objects", | |
"metric": [ | |
"loglikelihood_acc_single_token" | |
], | |
"hf_avail_splits": [ | |
"train" | |
], | |
"evaluation_splits": [ | |
"train" | |
], | |
"few_shots_split": null, | |
"few_shots_select": null, | |
"generation_size": -1, | |
"stop_sequence": [ | |
"</s>", | |
"Q:", | |
"\n\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 2000, | |
"effective_num_docs": 2000, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|bigbench:ruin_names": { | |
"name": "bigbench:ruin_names", | |
"prompt_function": "bbh_lighteval", | |
"hf_repo": "lighteval/bbh", | |
"hf_subset": "ruin_names", | |
"metric": [ | |
"loglikelihood_acc_single_token" | |
], | |
"hf_avail_splits": [ | |
"train" | |
], | |
"evaluation_splits": [ | |
"train" | |
], | |
"few_shots_split": null, | |
"few_shots_select": null, | |
"generation_size": -1, | |
"stop_sequence": [ | |
"</s>", | |
"Q:", | |
"\n\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 448, | |
"effective_num_docs": 448, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|bigbench:salient_translation_error_detection": { | |
"name": "bigbench:salient_translation_error_detection", | |
"prompt_function": "bbh_lighteval", | |
"hf_repo": "lighteval/bbh", | |
"hf_subset": "salient_translation_error_detection", | |
"metric": [ | |
"loglikelihood_acc_single_token" | |
], | |
"hf_avail_splits": [ | |
"train" | |
], | |
"evaluation_splits": [ | |
"train" | |
], | |
"few_shots_split": null, | |
"few_shots_select": null, | |
"generation_size": -1, | |
"stop_sequence": [ | |
"</s>", | |
"Q:", | |
"\n\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 998, | |
"effective_num_docs": 998, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|bigbench:snarks": { | |
"name": "bigbench:snarks", | |
"prompt_function": "bbh_lighteval", | |
"hf_repo": "lighteval/bbh", | |
"hf_subset": "snarks", | |
"metric": [ | |
"loglikelihood_acc_single_token" | |
], | |
"hf_avail_splits": [ | |
"train" | |
], | |
"evaluation_splits": [ | |
"train" | |
], | |
"few_shots_split": null, | |
"few_shots_select": null, | |
"generation_size": -1, | |
"stop_sequence": [ | |
"</s>", | |
"Q:", | |
"\n\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 181, | |
"effective_num_docs": 181, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|bigbench:sports_understanding": { | |
"name": "bigbench:sports_understanding", | |
"prompt_function": "bbh_lighteval", | |
"hf_repo": "lighteval/bbh", | |
"hf_subset": "sports_understanding", | |
"metric": [ | |
"loglikelihood_acc_single_token" | |
], | |
"hf_avail_splits": [ | |
"train" | |
], | |
"evaluation_splits": [ | |
"train" | |
], | |
"few_shots_split": null, | |
"few_shots_select": null, | |
"generation_size": -1, | |
"stop_sequence": [ | |
"</s>", | |
"Q:", | |
"\n\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 1000, | |
"effective_num_docs": 1000, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|bigbench:temporal_sequences": { | |
"name": "bigbench:temporal_sequences", | |
"prompt_function": "bbh_lighteval", | |
"hf_repo": "lighteval/bbh", | |
"hf_subset": "temporal_sequences", | |
"metric": [ | |
"loglikelihood_acc_single_token" | |
], | |
"hf_avail_splits": [ | |
"train" | |
], | |
"evaluation_splits": [ | |
"train" | |
], | |
"few_shots_split": null, | |
"few_shots_select": null, | |
"generation_size": -1, | |
"stop_sequence": [ | |
"</s>", | |
"Q:", | |
"\n\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 1000, | |
"effective_num_docs": 1000, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|bigbench:tracking_shuffled_objects_five_objects": { | |
"name": "bigbench:tracking_shuffled_objects_five_objects", | |
"prompt_function": "bbh_lighteval", | |
"hf_repo": "lighteval/bbh", | |
"hf_subset": "tracking_shuffled_objects_five_objects", | |
"metric": [ | |
"loglikelihood_acc_single_token" | |
], | |
"hf_avail_splits": [ | |
"train" | |
], | |
"evaluation_splits": [ | |
"train" | |
], | |
"few_shots_split": null, | |
"few_shots_select": null, | |
"generation_size": -1, | |
"stop_sequence": [ | |
"</s>", | |
"Q:", | |
"\n\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 1250, | |
"effective_num_docs": 1250, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|bigbench:tracking_shuffled_objects_seven_objects": { | |
"name": "bigbench:tracking_shuffled_objects_seven_objects", | |
"prompt_function": "bbh_lighteval", | |
"hf_repo": "lighteval/bbh", | |
"hf_subset": "tracking_shuffled_objects_seven_objects", | |
"metric": [ | |
"loglikelihood_acc_single_token" | |
], | |
"hf_avail_splits": [ | |
"train" | |
], | |
"evaluation_splits": [ | |
"train" | |
], | |
"few_shots_split": null, | |
"few_shots_select": null, | |
"generation_size": -1, | |
"stop_sequence": [ | |
"</s>", | |
"Q:", | |
"\n\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 1750, | |
"effective_num_docs": 1750, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
}, | |
"lighteval|bigbench:tracking_shuffled_objects_three_objects": { | |
"name": "bigbench:tracking_shuffled_objects_three_objects", | |
"prompt_function": "bbh_lighteval", | |
"hf_repo": "lighteval/bbh", | |
"hf_subset": "tracking_shuffled_objects_three_objects", | |
"metric": [ | |
"loglikelihood_acc_single_token" | |
], | |
"hf_avail_splits": [ | |
"train" | |
], | |
"evaluation_splits": [ | |
"train" | |
], | |
"few_shots_split": null, | |
"few_shots_select": null, | |
"generation_size": -1, | |
"stop_sequence": [ | |
"</s>", | |
"Q:", | |
"\n\n" | |
], | |
"output_regex": null, | |
"frozen": false, | |
"suite": [ | |
"lighteval" | |
], | |
"original_num_docs": 300, | |
"effective_num_docs": 300, | |
"trust_dataset": true, | |
"must_remove_duplicate_docs": null | |
} | |
}, | |
"summary_tasks": { | |
"lighteval|bigbench:causal_judgment|0": { | |
"hashes": { | |
"hash_examples": "dfb1ae47218f2850", | |
"hash_full_prompts": "806f52466e80cf69", | |
"hash_input_tokens": "f1c384c14656088d", | |
"hash_cont_tokens": "9a9bd8b966072e64" | |
}, | |
"truncated": 0, | |
"non_truncated": 190, | |
"padded": 189, | |
"non_padded": 1, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|bigbench:date_understanding|0": { | |
"hashes": { | |
"hash_examples": "2b823c41500a6ec2", | |
"hash_full_prompts": "1fd1025553a3148d", | |
"hash_input_tokens": "1081ece460afad8c", | |
"hash_cont_tokens": "ca1827d9c35df800" | |
}, | |
"truncated": 0, | |
"non_truncated": 369, | |
"padded": 369, | |
"non_padded": 0, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|bigbench:disambiguation_qa|0": { | |
"hashes": { | |
"hash_examples": "2a4c3d41db198cea", | |
"hash_full_prompts": "466dedb5ce49a0f9", | |
"hash_input_tokens": "864f38691cc7d065", | |
"hash_cont_tokens": "c3e79f545508f21a" | |
}, | |
"truncated": 0, | |
"non_truncated": 258, | |
"padded": 258, | |
"non_padded": 0, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|bigbench:geometric_shapes|0": { | |
"hashes": { | |
"hash_examples": "24aa261103911b72", | |
"hash_full_prompts": "d19b31229c611461", | |
"hash_input_tokens": "295e0e7a241dbb0d", | |
"hash_cont_tokens": "7351a9b4012a13a4" | |
}, | |
"truncated": 0, | |
"non_truncated": 360, | |
"padded": 360, | |
"non_padded": 0, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|bigbench:logical_deduction_five_objects|0": { | |
"hashes": { | |
"hash_examples": "cb5bdc92afc41f83", | |
"hash_full_prompts": "bde986b615b9cee8", | |
"hash_input_tokens": "424a73890d8e3a11", | |
"hash_cont_tokens": "3791ab917f69e6af" | |
}, | |
"truncated": 0, | |
"non_truncated": 500, | |
"padded": 500, | |
"non_padded": 0, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|bigbench:logical_deduction_seven_objects|0": { | |
"hashes": { | |
"hash_examples": "b6805ea696739f9f", | |
"hash_full_prompts": "7d07d652e49bb92a", | |
"hash_input_tokens": "1634404a63e8bad6", | |
"hash_cont_tokens": "9c00f5e49f3e5f98" | |
}, | |
"truncated": 0, | |
"non_truncated": 700, | |
"padded": 687, | |
"non_padded": 13, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|bigbench:logical_deduction_three_objects|0": { | |
"hashes": { | |
"hash_examples": "0509e5712ab9bcdb", | |
"hash_full_prompts": "fec43d39bca6d5c8", | |
"hash_input_tokens": "5481c1a53629152a", | |
"hash_cont_tokens": "02df3d11c26ce1da" | |
}, | |
"truncated": 0, | |
"non_truncated": 300, | |
"padded": 297, | |
"non_padded": 3, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|bigbench:movie_recommendation|0": { | |
"hashes": { | |
"hash_examples": "530cc6f737830f45", | |
"hash_full_prompts": "b6c2a8026905e612", | |
"hash_input_tokens": "700788853cfedc2d", | |
"hash_cont_tokens": "be520838bf2427bc" | |
}, | |
"truncated": 0, | |
"non_truncated": 500, | |
"padded": 494, | |
"non_padded": 6, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|bigbench:navigate|0": { | |
"hashes": { | |
"hash_examples": "7962ef85d0058b9a", | |
"hash_full_prompts": "aad9a2b00632d2cf", | |
"hash_input_tokens": "495eedbb1880cd1c", | |
"hash_cont_tokens": "ce2f051624b0c7f2" | |
}, | |
"truncated": 0, | |
"non_truncated": 1000, | |
"padded": 981, | |
"non_padded": 19, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|bigbench:reasoning_about_colored_objects|0": { | |
"hashes": { | |
"hash_examples": "39be1ab1677a651d", | |
"hash_full_prompts": "c54dfef6c016e6a9", | |
"hash_input_tokens": "826edc0923863060", | |
"hash_cont_tokens": "87abd4073f64b19a" | |
}, | |
"truncated": 0, | |
"non_truncated": 2000, | |
"padded": 1991, | |
"non_padded": 9, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|bigbench:ruin_names|0": { | |
"hashes": { | |
"hash_examples": "e9b96b31d2154941", | |
"hash_full_prompts": "c8b837710c20b1eb", | |
"hash_input_tokens": "51cf77669cdfc877", | |
"hash_cont_tokens": "046bbbbddb05b429" | |
}, | |
"truncated": 0, | |
"non_truncated": 448, | |
"padded": 447, | |
"non_padded": 1, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|bigbench:salient_translation_error_detection|0": { | |
"hashes": { | |
"hash_examples": "951ac59f7ad0427d", | |
"hash_full_prompts": "e917bc26b7a84dc2", | |
"hash_input_tokens": "a80cd58136927d10", | |
"hash_cont_tokens": "12a73509c698a7dc" | |
}, | |
"truncated": 0, | |
"non_truncated": 998, | |
"padded": 998, | |
"non_padded": 0, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|bigbench:snarks|0": { | |
"hashes": { | |
"hash_examples": "3a53eb9b9d758534", | |
"hash_full_prompts": "82d0ce6eda4a87e8", | |
"hash_input_tokens": "df117a51d9f992fb", | |
"hash_cont_tokens": "11293a1bfa806eba" | |
}, | |
"truncated": 0, | |
"non_truncated": 181, | |
"padded": 173, | |
"non_padded": 8, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|bigbench:sports_understanding|0": { | |
"hashes": { | |
"hash_examples": "bd65741f00770373", | |
"hash_full_prompts": "c3d2303da3ad2d87", | |
"hash_input_tokens": "6cc9d46780536559", | |
"hash_cont_tokens": "a934157fb362ce28" | |
}, | |
"truncated": 0, | |
"non_truncated": 1000, | |
"padded": 1000, | |
"non_padded": 0, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|bigbench:temporal_sequences|0": { | |
"hashes": { | |
"hash_examples": "1d13139f47cb2df7", | |
"hash_full_prompts": "fd9b6954c76cdd8f", | |
"hash_input_tokens": "dd061329041e70c4", | |
"hash_cont_tokens": "326f6f7b7bdf4692" | |
}, | |
"truncated": 0, | |
"non_truncated": 1000, | |
"padded": 988, | |
"non_padded": 12, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|bigbench:tracking_shuffled_objects_five_objects|0": { | |
"hashes": { | |
"hash_examples": "8770a702a9646648", | |
"hash_full_prompts": "74fbca2d47f8529e", | |
"hash_input_tokens": "081eb0aa200abfde", | |
"hash_cont_tokens": "30ca4364426258e1" | |
}, | |
"truncated": 0, | |
"non_truncated": 1250, | |
"padded": 1240, | |
"non_padded": 10, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|bigbench:tracking_shuffled_objects_seven_objects|0": { | |
"hashes": { | |
"hash_examples": "b469b7d073824a59", | |
"hash_full_prompts": "bb13b4b0abdc1636", | |
"hash_input_tokens": "c85929ae666feadd", | |
"hash_cont_tokens": "f76ba63a583d749e" | |
}, | |
"truncated": 0, | |
"non_truncated": 1750, | |
"padded": 1675, | |
"non_padded": 75, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
}, | |
"lighteval|bigbench:tracking_shuffled_objects_three_objects|0": { | |
"hashes": { | |
"hash_examples": "0509e5712ab9bcdb", | |
"hash_full_prompts": "fec43d39bca6d5c8", | |
"hash_input_tokens": "315a1e9011792d46", | |
"hash_cont_tokens": "8aea5a43cb736677" | |
}, | |
"truncated": 0, | |
"non_truncated": 300, | |
"padded": 294, | |
"non_padded": 6, | |
"effective_few_shots": 0.0, | |
"num_truncated_few_shots": 0 | |
} | |
}, | |
"summary_general": { | |
"hashes": { | |
"hash_examples": "51a30c4501ba4586", | |
"hash_full_prompts": "70511cde3f483db2", | |
"hash_input_tokens": "1de3c6c854da57bc", | |
"hash_cont_tokens": "b4e1747eea39656b" | |
}, | |
"truncated": 0, | |
"non_truncated": 13104, | |
"padded": 12941, | |
"non_padded": 163, | |
"num_truncated_few_shots": 0 | |
} | |
} |