lewtun's picture
lewtun HF staff
Upload eval_results/HuggingFaceH4/zephyr-7b-beta/main/bbh/results_2024-03-28T16-39-32.239458.json with huggingface_hub
89c611e verified
raw
history blame
27 kB
{
"config_general": {
"lighteval_sha": "?",
"num_fewshot_seeds": 1,
"override_batch_size": 1,
"max_samples": null,
"job_id": "",
"start_time": 8646228.50442761,
"end_time": 8646420.550566657,
"total_evaluation_time_secondes": "192.0461390465498",
"model_name": "HuggingFaceH4/zephyr-7b-beta",
"model_sha": "b70e0c9a2d9e14bd1e812d3c398e5f313e93b473",
"model_dtype": "torch.bfloat16",
"model_size": "13.99 GB",
"config": null
},
"results": {
"lighteval|bigbench:causal_judgment|0": {
"acc": 0.5105263157894737,
"acc_stderr": 0.03636158772354769
},
"lighteval|bigbench:date_understanding|0": {
"acc": 0.3224932249322493,
"acc_stderr": 0.024366503246229146
},
"lighteval|bigbench:disambiguation_qa|0": {
"acc": 0.5697674418604651,
"acc_stderr": 0.030884024559333312
},
"lighteval|bigbench:geometric_shapes|0": {
"acc": 0.058333333333333334,
"acc_stderr": 0.012369717218047992
},
"lighteval|bigbench:logical_deduction_five_objects|0": {
"acc": 0.28,
"acc_stderr": 0.020099950647503233
},
"lighteval|bigbench:logical_deduction_seven_objects|0": {
"acc": 0.18285714285714286,
"acc_stderr": 0.014620639582701147
},
"lighteval|bigbench:logical_deduction_three_objects|0": {
"acc": 0.43,
"acc_stderr": 0.02863096997084747
},
"lighteval|bigbench:movie_recommendation|0": {
"acc": 0.45,
"acc_stderr": 0.022270877485360437
},
"lighteval|bigbench:navigate|0": {
"acc": 0.483,
"acc_stderr": 0.01581015372983343
},
"lighteval|bigbench:reasoning_about_colored_objects|0": {
"acc": 0.355,
"acc_stderr": 0.010702559151258895
},
"lighteval|bigbench:ruin_names|0": {
"acc": 0.40848214285714285,
"acc_stderr": 0.023249665356807413
},
"lighteval|bigbench:salient_translation_error_detection|0": {
"acc": 0.313627254509018,
"acc_stderr": 0.014693976267634754
},
"lighteval|bigbench:snarks|0": {
"acc": 0.585635359116022,
"acc_stderr": 0.03671713082389855
},
"lighteval|bigbench:sports_understanding|0": {
"acc": 0.691,
"acc_stderr": 0.014619600977206488
},
"lighteval|bigbench:temporal_sequences|0": {
"acc": 0.658,
"acc_stderr": 0.015008706182121731
},
"lighteval|bigbench:tracking_shuffled_objects_five_objects|0": {
"acc": 0.1472,
"acc_stderr": 0.010025274704781165
},
"lighteval|bigbench:tracking_shuffled_objects_seven_objects|0": {
"acc": 0.09542857142857143,
"acc_stderr": 0.007025313992794412
},
"lighteval|bigbench:tracking_shuffled_objects_three_objects|0": {
"acc": 0.43,
"acc_stderr": 0.02863096997084747
},
"lighteval|bigbench:_average|0": {
"acc": 0.38729726592685654,
"acc_stderr": 0.020338201199486376
},
"all": {
"acc": 0.38729726592685654,
"acc_stderr": 0.020338201199486376
}
},
"versions": {
"lighteval|bigbench:causal_judgment|0": 0,
"lighteval|bigbench:date_understanding|0": 0,
"lighteval|bigbench:disambiguation_qa|0": 0,
"lighteval|bigbench:geometric_shapes|0": 0,
"lighteval|bigbench:logical_deduction_five_objects|0": 0,
"lighteval|bigbench:logical_deduction_seven_objects|0": 0,
"lighteval|bigbench:logical_deduction_three_objects|0": 0,
"lighteval|bigbench:movie_recommendation|0": 0,
"lighteval|bigbench:navigate|0": 0,
"lighteval|bigbench:reasoning_about_colored_objects|0": 0,
"lighteval|bigbench:ruin_names|0": 0,
"lighteval|bigbench:salient_translation_error_detection|0": 0,
"lighteval|bigbench:snarks|0": 0,
"lighteval|bigbench:sports_understanding|0": 0,
"lighteval|bigbench:temporal_sequences|0": 0,
"lighteval|bigbench:tracking_shuffled_objects_five_objects|0": 0,
"lighteval|bigbench:tracking_shuffled_objects_seven_objects|0": 0,
"lighteval|bigbench:tracking_shuffled_objects_three_objects|0": 0
},
"config_tasks": {
"lighteval|bigbench:causal_judgment": {
"name": "bigbench:causal_judgment",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "causal_judgement",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 190,
"effective_num_docs": 190,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:date_understanding": {
"name": "bigbench:date_understanding",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "date_understanding",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 369,
"effective_num_docs": 369,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:disambiguation_qa": {
"name": "bigbench:disambiguation_qa",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "disambiguation_qa",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 258,
"effective_num_docs": 258,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:geometric_shapes": {
"name": "bigbench:geometric_shapes",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "geometric_shapes",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 360,
"effective_num_docs": 360,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:logical_deduction_five_objects": {
"name": "bigbench:logical_deduction_five_objects",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "logical_deduction_five_objects",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 500,
"effective_num_docs": 500,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:logical_deduction_seven_objects": {
"name": "bigbench:logical_deduction_seven_objects",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "logical_deduction_seven_objects",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 700,
"effective_num_docs": 700,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:logical_deduction_three_objects": {
"name": "bigbench:logical_deduction_three_objects",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "logical_deduction_three_objects",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 300,
"effective_num_docs": 300,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:movie_recommendation": {
"name": "bigbench:movie_recommendation",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "movie_recommendation",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 500,
"effective_num_docs": 500,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:navigate": {
"name": "bigbench:navigate",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "navigate",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 1000,
"effective_num_docs": 1000,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:reasoning_about_colored_objects": {
"name": "bigbench:reasoning_about_colored_objects",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "reasoning_about_colored_objects",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 2000,
"effective_num_docs": 2000,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:ruin_names": {
"name": "bigbench:ruin_names",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "ruin_names",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 448,
"effective_num_docs": 448,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:salient_translation_error_detection": {
"name": "bigbench:salient_translation_error_detection",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "salient_translation_error_detection",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 998,
"effective_num_docs": 998,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:snarks": {
"name": "bigbench:snarks",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "snarks",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 181,
"effective_num_docs": 181,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:sports_understanding": {
"name": "bigbench:sports_understanding",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "sports_understanding",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 1000,
"effective_num_docs": 1000,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:temporal_sequences": {
"name": "bigbench:temporal_sequences",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "temporal_sequences",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 1000,
"effective_num_docs": 1000,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:tracking_shuffled_objects_five_objects": {
"name": "bigbench:tracking_shuffled_objects_five_objects",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "tracking_shuffled_objects_five_objects",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 1250,
"effective_num_docs": 1250,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:tracking_shuffled_objects_seven_objects": {
"name": "bigbench:tracking_shuffled_objects_seven_objects",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "tracking_shuffled_objects_seven_objects",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 1750,
"effective_num_docs": 1750,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:tracking_shuffled_objects_three_objects": {
"name": "bigbench:tracking_shuffled_objects_three_objects",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "tracking_shuffled_objects_three_objects",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 300,
"effective_num_docs": 300,
"trust_dataset": true,
"must_remove_duplicate_docs": null
}
},
"summary_tasks": {
"lighteval|bigbench:causal_judgment|0": {
"hashes": {
"hash_examples": "dfb1ae47218f2850",
"hash_full_prompts": "92b1cf75ca896127",
"hash_input_tokens": "3fbebfde3354c6ac",
"hash_cont_tokens": "ac670c3ea513a639"
},
"truncated": 0,
"non_truncated": 190,
"padded": 189,
"non_padded": 1,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:date_understanding|0": {
"hashes": {
"hash_examples": "2b823c41500a6ec2",
"hash_full_prompts": "a086589baadb24a5",
"hash_input_tokens": "262880ca53810c51",
"hash_cont_tokens": "e7711b87d7f90d38"
},
"truncated": 0,
"non_truncated": 369,
"padded": 369,
"non_padded": 0,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:disambiguation_qa|0": {
"hashes": {
"hash_examples": "2a4c3d41db198cea",
"hash_full_prompts": "407f0b9a565699a7",
"hash_input_tokens": "7159e757a625c662",
"hash_cont_tokens": "de89f8a6e5dac00c"
},
"truncated": 0,
"non_truncated": 258,
"padded": 258,
"non_padded": 0,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:geometric_shapes|0": {
"hashes": {
"hash_examples": "24aa261103911b72",
"hash_full_prompts": "c7a3189ee1642ab5",
"hash_input_tokens": "2bbd71a22a0967ff",
"hash_cont_tokens": "e51eec73c3eb26c9"
},
"truncated": 0,
"non_truncated": 360,
"padded": 360,
"non_padded": 0,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:logical_deduction_five_objects|0": {
"hashes": {
"hash_examples": "cb5bdc92afc41f83",
"hash_full_prompts": "199211ceee2c8d60",
"hash_input_tokens": "0c67894c26b553e1",
"hash_cont_tokens": "4c9e9d2d14981c58"
},
"truncated": 0,
"non_truncated": 500,
"padded": 500,
"non_padded": 0,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:logical_deduction_seven_objects|0": {
"hashes": {
"hash_examples": "b6805ea696739f9f",
"hash_full_prompts": "38f66304971bdfb0",
"hash_input_tokens": "fe8815876ceefc9f",
"hash_cont_tokens": "1745fa6fd92f0e0d"
},
"truncated": 0,
"non_truncated": 700,
"padded": 700,
"non_padded": 0,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:logical_deduction_three_objects|0": {
"hashes": {
"hash_examples": "0509e5712ab9bcdb",
"hash_full_prompts": "0a9439e283842405",
"hash_input_tokens": "bdaf9a4ad6d5308d",
"hash_cont_tokens": "2b5b679169d7bcf1"
},
"truncated": 0,
"non_truncated": 300,
"padded": 300,
"non_padded": 0,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:movie_recommendation|0": {
"hashes": {
"hash_examples": "530cc6f737830f45",
"hash_full_prompts": "e7d59f843d80e6ba",
"hash_input_tokens": "d107040c803cc28a",
"hash_cont_tokens": "be520838bf2427bc"
},
"truncated": 0,
"non_truncated": 500,
"padded": 500,
"non_padded": 0,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:navigate|0": {
"hashes": {
"hash_examples": "7962ef85d0058b9a",
"hash_full_prompts": "d58b607419984968",
"hash_input_tokens": "f163e125b820f371",
"hash_cont_tokens": "04e3a57b821a3dd8"
},
"truncated": 0,
"non_truncated": 1000,
"padded": 988,
"non_padded": 12,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:reasoning_about_colored_objects|0": {
"hashes": {
"hash_examples": "39be1ab1677a651d",
"hash_full_prompts": "157347bb0e7fe3f1",
"hash_input_tokens": "562dab78f82dce11",
"hash_cont_tokens": "3fe982d2154a001a"
},
"truncated": 0,
"non_truncated": 2000,
"padded": 1969,
"non_padded": 31,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:ruin_names|0": {
"hashes": {
"hash_examples": "e9b96b31d2154941",
"hash_full_prompts": "9cb73d2fcaf5ee1e",
"hash_input_tokens": "07268ad2915fab08",
"hash_cont_tokens": "046bbbbddb05b429"
},
"truncated": 0,
"non_truncated": 448,
"padded": 442,
"non_padded": 6,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:salient_translation_error_detection|0": {
"hashes": {
"hash_examples": "951ac59f7ad0427d",
"hash_full_prompts": "3598ce066bb83298",
"hash_input_tokens": "d514272792c1a43b",
"hash_cont_tokens": "e78fb6d09071e0f6"
},
"truncated": 0,
"non_truncated": 998,
"padded": 998,
"non_padded": 0,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:snarks|0": {
"hashes": {
"hash_examples": "3a53eb9b9d758534",
"hash_full_prompts": "fb6c17d84dd479d6",
"hash_input_tokens": "a714129b2023dfb4",
"hash_cont_tokens": "f5cb71a436613293"
},
"truncated": 0,
"non_truncated": 181,
"padded": 178,
"non_padded": 3,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:sports_understanding|0": {
"hashes": {
"hash_examples": "bd65741f00770373",
"hash_full_prompts": "467a508a87ae3ce4",
"hash_input_tokens": "c1c3655837258597",
"hash_cont_tokens": "02230fac16464d15"
},
"truncated": 0,
"non_truncated": 1000,
"padded": 1000,
"non_padded": 0,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:temporal_sequences|0": {
"hashes": {
"hash_examples": "1d13139f47cb2df7",
"hash_full_prompts": "c1b66e439c72477b",
"hash_input_tokens": "d47a810e1658bcc6",
"hash_cont_tokens": "88c86d8bfb960c7d"
},
"truncated": 0,
"non_truncated": 1000,
"padded": 1000,
"non_padded": 0,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:tracking_shuffled_objects_five_objects|0": {
"hashes": {
"hash_examples": "8770a702a9646648",
"hash_full_prompts": "8c353eaa84f712ff",
"hash_input_tokens": "e940ffaad712c055",
"hash_cont_tokens": "7cf11d867348e0b1"
},
"truncated": 0,
"non_truncated": 1250,
"padded": 1180,
"non_padded": 70,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:tracking_shuffled_objects_seven_objects|0": {
"hashes": {
"hash_examples": "b469b7d073824a59",
"hash_full_prompts": "902f5b74467353eb",
"hash_input_tokens": "9d52e492d2f6cd34",
"hash_cont_tokens": "f76ba63a583d749e"
},
"truncated": 0,
"non_truncated": 1750,
"padded": 1701,
"non_padded": 49,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:tracking_shuffled_objects_three_objects|0": {
"hashes": {
"hash_examples": "0509e5712ab9bcdb",
"hash_full_prompts": "0a9439e283842405",
"hash_input_tokens": "5867c63542b2b703",
"hash_cont_tokens": "b2cce0a4a2edc859"
},
"truncated": 0,
"non_truncated": 300,
"padded": 294,
"non_padded": 6,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
}
},
"summary_general": {
"hashes": {
"hash_examples": "51a30c4501ba4586",
"hash_full_prompts": "2f8425bef1f1b307",
"hash_input_tokens": "b84ccc3699968295",
"hash_cont_tokens": "2f0ff7c19ccc0d8e"
},
"truncated": 0,
"non_truncated": 13104,
"padded": 12926,
"non_padded": 178,
"num_truncated_few_shots": 0
}
}