lewtun's picture
lewtun HF staff
Upload eval_results/HuggingFaceH4/zephyr-7b-gemma-v0.1/main/bbh/results_2024-03-28T16-39-37.888825.json with huggingface_hub
5834ead verified
raw
history blame
27 kB
{
"config_general": {
"lighteval_sha": "?",
"num_fewshot_seeds": 1,
"override_batch_size": 1,
"max_samples": null,
"job_id": "",
"start_time": 1617058.486235754,
"end_time": 1617285.004213936,
"total_evaluation_time_secondes": "226.51797818182968",
"model_name": "HuggingFaceH4/zephyr-7b-gemma-v0.1",
"model_sha": "03b3427d0ed07d2e0f86c0a7e53d82d4beef9540",
"model_dtype": "torch.bfloat16",
"model_size": "15.9 GB",
"config": null
},
"results": {
"lighteval|bigbench:causal_judgment|0": {
"acc": 0.5736842105263158,
"acc_stderr": 0.03597255252302466
},
"lighteval|bigbench:date_understanding|0": {
"acc": 0.9132791327913279,
"acc_stderr": 0.014670322600929677
},
"lighteval|bigbench:disambiguation_qa|0": {
"acc": 0.375968992248062,
"acc_stderr": 0.03021430003361904
},
"lighteval|bigbench:geometric_shapes|0": {
"acc": 0.28888888888888886,
"acc_stderr": 0.02392141840275226
},
"lighteval|bigbench:logical_deduction_five_objects|0": {
"acc": 0.236,
"acc_stderr": 0.019008699622084718
},
"lighteval|bigbench:logical_deduction_seven_objects|0": {
"acc": 0.16142857142857142,
"acc_stderr": 0.01391623149183487
},
"lighteval|bigbench:logical_deduction_three_objects|0": {
"acc": 0.36666666666666664,
"acc_stderr": 0.027868673283383924
},
"lighteval|bigbench:movie_recommendation|0": {
"acc": 0.424,
"acc_stderr": 0.022122993778135404
},
"lighteval|bigbench:navigate|0": {
"acc": 0.554,
"acc_stderr": 0.015726771166750357
},
"lighteval|bigbench:reasoning_about_colored_objects|0": {
"acc": 0.2305,
"acc_stderr": 0.009419619929689796
},
"lighteval|bigbench:ruin_names|0": {
"acc": 0.328125,
"acc_stderr": 0.0222080353262888
},
"lighteval|bigbench:salient_translation_error_detection|0": {
"acc": 0.3106212424849699,
"acc_stderr": 0.014655375337428495
},
"lighteval|bigbench:snarks|0": {
"acc": 0.39779005524861877,
"acc_stderr": 0.03648082656181033
},
"lighteval|bigbench:sports_understanding|0": {
"acc": 0.676,
"acc_stderr": 0.014806864733738863
},
"lighteval|bigbench:temporal_sequences|0": {
"acc": 0.97,
"acc_stderr": 0.005397140829099201
},
"lighteval|bigbench:tracking_shuffled_objects_five_objects|0": {
"acc": 0.1352,
"acc_stderr": 0.009675311743084085
},
"lighteval|bigbench:tracking_shuffled_objects_seven_objects|0": {
"acc": 0.07485714285714286,
"acc_stderr": 0.006292541939446618
},
"lighteval|bigbench:tracking_shuffled_objects_three_objects|0": {
"acc": 0.36666666666666664,
"acc_stderr": 0.027868673283383924
},
"lighteval|bigbench:_average|0": {
"acc": 0.41020425387817944,
"acc_stderr": 0.019457019588138058
},
"all": {
"acc": 0.41020425387817944,
"acc_stderr": 0.019457019588138058
}
},
"versions": {
"lighteval|bigbench:causal_judgment|0": 0,
"lighteval|bigbench:date_understanding|0": 0,
"lighteval|bigbench:disambiguation_qa|0": 0,
"lighteval|bigbench:geometric_shapes|0": 0,
"lighteval|bigbench:logical_deduction_five_objects|0": 0,
"lighteval|bigbench:logical_deduction_seven_objects|0": 0,
"lighteval|bigbench:logical_deduction_three_objects|0": 0,
"lighteval|bigbench:movie_recommendation|0": 0,
"lighteval|bigbench:navigate|0": 0,
"lighteval|bigbench:reasoning_about_colored_objects|0": 0,
"lighteval|bigbench:ruin_names|0": 0,
"lighteval|bigbench:salient_translation_error_detection|0": 0,
"lighteval|bigbench:snarks|0": 0,
"lighteval|bigbench:sports_understanding|0": 0,
"lighteval|bigbench:temporal_sequences|0": 0,
"lighteval|bigbench:tracking_shuffled_objects_five_objects|0": 0,
"lighteval|bigbench:tracking_shuffled_objects_seven_objects|0": 0,
"lighteval|bigbench:tracking_shuffled_objects_three_objects|0": 0
},
"config_tasks": {
"lighteval|bigbench:causal_judgment": {
"name": "bigbench:causal_judgment",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "causal_judgement",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 190,
"effective_num_docs": 190,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:date_understanding": {
"name": "bigbench:date_understanding",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "date_understanding",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 369,
"effective_num_docs": 369,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:disambiguation_qa": {
"name": "bigbench:disambiguation_qa",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "disambiguation_qa",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 258,
"effective_num_docs": 258,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:geometric_shapes": {
"name": "bigbench:geometric_shapes",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "geometric_shapes",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 360,
"effective_num_docs": 360,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:logical_deduction_five_objects": {
"name": "bigbench:logical_deduction_five_objects",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "logical_deduction_five_objects",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 500,
"effective_num_docs": 500,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:logical_deduction_seven_objects": {
"name": "bigbench:logical_deduction_seven_objects",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "logical_deduction_seven_objects",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 700,
"effective_num_docs": 700,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:logical_deduction_three_objects": {
"name": "bigbench:logical_deduction_three_objects",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "logical_deduction_three_objects",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 300,
"effective_num_docs": 300,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:movie_recommendation": {
"name": "bigbench:movie_recommendation",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "movie_recommendation",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 500,
"effective_num_docs": 500,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:navigate": {
"name": "bigbench:navigate",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "navigate",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 1000,
"effective_num_docs": 1000,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:reasoning_about_colored_objects": {
"name": "bigbench:reasoning_about_colored_objects",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "reasoning_about_colored_objects",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 2000,
"effective_num_docs": 2000,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:ruin_names": {
"name": "bigbench:ruin_names",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "ruin_names",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 448,
"effective_num_docs": 448,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:salient_translation_error_detection": {
"name": "bigbench:salient_translation_error_detection",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "salient_translation_error_detection",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 998,
"effective_num_docs": 998,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:snarks": {
"name": "bigbench:snarks",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "snarks",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 181,
"effective_num_docs": 181,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:sports_understanding": {
"name": "bigbench:sports_understanding",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "sports_understanding",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 1000,
"effective_num_docs": 1000,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:temporal_sequences": {
"name": "bigbench:temporal_sequences",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "temporal_sequences",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 1000,
"effective_num_docs": 1000,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:tracking_shuffled_objects_five_objects": {
"name": "bigbench:tracking_shuffled_objects_five_objects",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "tracking_shuffled_objects_five_objects",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 1250,
"effective_num_docs": 1250,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:tracking_shuffled_objects_seven_objects": {
"name": "bigbench:tracking_shuffled_objects_seven_objects",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "tracking_shuffled_objects_seven_objects",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 1750,
"effective_num_docs": 1750,
"trust_dataset": true,
"must_remove_duplicate_docs": null
},
"lighteval|bigbench:tracking_shuffled_objects_three_objects": {
"name": "bigbench:tracking_shuffled_objects_three_objects",
"prompt_function": "bbh_lighteval",
"hf_repo": "lighteval/bbh",
"hf_subset": "tracking_shuffled_objects_three_objects",
"metric": [
"loglikelihood_acc_single_token"
],
"hf_avail_splits": [
"train"
],
"evaluation_splits": [
"train"
],
"few_shots_split": null,
"few_shots_select": null,
"generation_size": -1,
"stop_sequence": [
"</s>",
"Q:",
"\n\n"
],
"output_regex": null,
"frozen": false,
"suite": [
"lighteval"
],
"original_num_docs": 300,
"effective_num_docs": 300,
"trust_dataset": true,
"must_remove_duplicate_docs": null
}
},
"summary_tasks": {
"lighteval|bigbench:causal_judgment|0": {
"hashes": {
"hash_examples": "dfb1ae47218f2850",
"hash_full_prompts": "96935a8fcd3f9515",
"hash_input_tokens": "98879f9169ad156e",
"hash_cont_tokens": "27baf866ca0ebad0"
},
"truncated": 0,
"non_truncated": 190,
"padded": 189,
"non_padded": 1,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:date_understanding|0": {
"hashes": {
"hash_examples": "2b823c41500a6ec2",
"hash_full_prompts": "d5a29467e95c3208",
"hash_input_tokens": "076f752b3135e00e",
"hash_cont_tokens": "8c6fe215ff8c91d4"
},
"truncated": 0,
"non_truncated": 369,
"padded": 369,
"non_padded": 0,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:disambiguation_qa|0": {
"hashes": {
"hash_examples": "2a4c3d41db198cea",
"hash_full_prompts": "aa28de94e5bd7a3f",
"hash_input_tokens": "dffce622f6eb1685",
"hash_cont_tokens": "839a73804d2ac3d8"
},
"truncated": 0,
"non_truncated": 258,
"padded": 258,
"non_padded": 0,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:geometric_shapes|0": {
"hashes": {
"hash_examples": "24aa261103911b72",
"hash_full_prompts": "3e4345bcb0c1f4dc",
"hash_input_tokens": "c81abf22142e3782",
"hash_cont_tokens": "a23d1c0a4e96b0e1"
},
"truncated": 0,
"non_truncated": 360,
"padded": 360,
"non_padded": 0,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:logical_deduction_five_objects|0": {
"hashes": {
"hash_examples": "cb5bdc92afc41f83",
"hash_full_prompts": "76ce5753dd1dc32c",
"hash_input_tokens": "e852b9bc22c5385b",
"hash_cont_tokens": "5bea7ce607cd4f12"
},
"truncated": 0,
"non_truncated": 500,
"padded": 485,
"non_padded": 15,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:logical_deduction_seven_objects|0": {
"hashes": {
"hash_examples": "b6805ea696739f9f",
"hash_full_prompts": "51905bd1825bc92a",
"hash_input_tokens": "1094dcc48a593518",
"hash_cont_tokens": "c26a5f3833e0da24"
},
"truncated": 0,
"non_truncated": 700,
"padded": 691,
"non_padded": 9,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:logical_deduction_three_objects|0": {
"hashes": {
"hash_examples": "0509e5712ab9bcdb",
"hash_full_prompts": "6be9d3a3909eda4a",
"hash_input_tokens": "27b53b79cffd2b93",
"hash_cont_tokens": "2483967357a945e8"
},
"truncated": 0,
"non_truncated": 300,
"padded": 300,
"non_padded": 0,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:movie_recommendation|0": {
"hashes": {
"hash_examples": "530cc6f737830f45",
"hash_full_prompts": "c0ae8ffebb5204fe",
"hash_input_tokens": "0818e3fafec036fa",
"hash_cont_tokens": "a3f0156898f538b9"
},
"truncated": 0,
"non_truncated": 500,
"padded": 500,
"non_padded": 0,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:navigate|0": {
"hashes": {
"hash_examples": "7962ef85d0058b9a",
"hash_full_prompts": "a63d7578685f36a0",
"hash_input_tokens": "ae0d8932ad6420bc",
"hash_cont_tokens": "20ec5040db6c6f31"
},
"truncated": 0,
"non_truncated": 1000,
"padded": 1000,
"non_padded": 0,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:reasoning_about_colored_objects|0": {
"hashes": {
"hash_examples": "39be1ab1677a651d",
"hash_full_prompts": "0f808fd212774a7d",
"hash_input_tokens": "419cbfb1f346bc2e",
"hash_cont_tokens": "44d8e0f62d7df58b"
},
"truncated": 0,
"non_truncated": 2000,
"padded": 1991,
"non_padded": 9,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:ruin_names|0": {
"hashes": {
"hash_examples": "e9b96b31d2154941",
"hash_full_prompts": "19ecafe4f1431e27",
"hash_input_tokens": "1bdda06915e334f8",
"hash_cont_tokens": "0b596f99363eb5c3"
},
"truncated": 0,
"non_truncated": 448,
"padded": 448,
"non_padded": 0,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:salient_translation_error_detection|0": {
"hashes": {
"hash_examples": "951ac59f7ad0427d",
"hash_full_prompts": "8e7e342eacd2cf2e",
"hash_input_tokens": "ac2cbbeffd4c169d",
"hash_cont_tokens": "1d52dbd3c6e8935e"
},
"truncated": 0,
"non_truncated": 998,
"padded": 998,
"non_padded": 0,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:snarks|0": {
"hashes": {
"hash_examples": "3a53eb9b9d758534",
"hash_full_prompts": "71517dc2cd13adcc",
"hash_input_tokens": "389e4a552eab329a",
"hash_cont_tokens": "6857aed0db0c3070"
},
"truncated": 0,
"non_truncated": 181,
"padded": 181,
"non_padded": 0,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:sports_understanding|0": {
"hashes": {
"hash_examples": "bd65741f00770373",
"hash_full_prompts": "3cbb59c4ba3fcabd",
"hash_input_tokens": "03f3e2dcf5910e8e",
"hash_cont_tokens": "d64733203fab197c"
},
"truncated": 0,
"non_truncated": 1000,
"padded": 1000,
"non_padded": 0,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:temporal_sequences|0": {
"hashes": {
"hash_examples": "1d13139f47cb2df7",
"hash_full_prompts": "edd2cff7265c0608",
"hash_input_tokens": "955b4072cc9c7a2f",
"hash_cont_tokens": "652b87f3997f644f"
},
"truncated": 0,
"non_truncated": 1000,
"padded": 989,
"non_padded": 11,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:tracking_shuffled_objects_five_objects|0": {
"hashes": {
"hash_examples": "8770a702a9646648",
"hash_full_prompts": "5d8c9095784e4a9c",
"hash_input_tokens": "9d05d4f80660877d",
"hash_cont_tokens": "c94830adf3f9436e"
},
"truncated": 0,
"non_truncated": 1250,
"padded": 1240,
"non_padded": 10,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:tracking_shuffled_objects_seven_objects|0": {
"hashes": {
"hash_examples": "b469b7d073824a59",
"hash_full_prompts": "83243fc25edaa8cb",
"hash_input_tokens": "2ef5a68a26314a27",
"hash_cont_tokens": "7c7ab1ca5cf9687d"
},
"truncated": 0,
"non_truncated": 1750,
"padded": 1750,
"non_padded": 0,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
},
"lighteval|bigbench:tracking_shuffled_objects_three_objects|0": {
"hashes": {
"hash_examples": "0509e5712ab9bcdb",
"hash_full_prompts": "6be9d3a3909eda4a",
"hash_input_tokens": "322bc0f7babbf45c",
"hash_cont_tokens": "a3fa73aac58469cc"
},
"truncated": 0,
"non_truncated": 300,
"padded": 294,
"non_padded": 6,
"effective_few_shots": 0.0,
"num_truncated_few_shots": 0
}
},
"summary_general": {
"hashes": {
"hash_examples": "51a30c4501ba4586",
"hash_full_prompts": "dbba586b60b739e4",
"hash_input_tokens": "9b30ece4d16ef51e",
"hash_cont_tokens": "529126904ad14905"
},
"truncated": 0,
"non_truncated": 13104,
"padded": 13043,
"non_padded": 61,
"num_truncated_few_shots": 0
}
}