{ "config_general": { "model_name": "meta-llama/Llama-2-13b-hf", "model_sha": "db6b8eb1feabb38985fdf785a89895959e944936", "model_size": "24.32 GB", "model_dtype": "torch.float16", "lighteval_sha": "457ac5672c5fdebfd6bc95bb94bda825c148eccf", "num_few_shot_default": 0, "num_fewshot_seeds": 1, "override_batch_size": 1, "max_samples": null, "job_id": "" }, "results": { "harness|drop|3": { "em": 0.0014681208053691276, "em_stderr": 0.00039210421902982666, "f1": 0.0607822986577181, "f1_stderr": 0.0013583957676382913 }, "harness|gsm8k|5": { "acc": 0.10841546626231995, "acc_stderr": 0.008563852506627487 }, "harness|winogrande|5": { "acc": 0.7663772691397001, "acc_stderr": 0.011892194477183524 }, "all": { "em": 0.0014681208053691276, "em_stderr": 0.00039210421902982666, "f1": 0.0607822986577181, "f1_stderr": 0.0013583957676382913, "acc": 0.43739636770101, "acc_stderr": 0.010228023491905505 } }, "versions": { "harness|drop|3": 1, "harness|gsm8k|5": 0, "harness|winogrande|5": 0, "all": 0 }, "config_tasks": { "harness|drop": "LM Harness task", "harness|gsm8k": "LM Harness task", "harness|winogrande": "LM Harness task" }, "summary_tasks": { "harness|drop|3": { "hashes": { "hash_examples": "1d27416e8324e9a3", "hash_full_prompts": "a5513ff9a741b385", "hash_input_tokens": "42076f0efbb50aa6", "hash_cont_tokens": "c9346ec21b7560de" }, "truncated": 3, "non-truncated": 9533, "padded": 0, "non-padded": 9536, "effective_few_shots": 3.0, "num_truncated_few_shots": 0 }, "harness|gsm8k|5": { "hashes": { "hash_examples": "4c0843a5d99bcfdc", "hash_full_prompts": "41d55e83abc0e02d", "hash_input_tokens": "bda342e47b5099b2", "hash_cont_tokens": "32cafa77d8a3f04e" }, "truncated": 0, "non-truncated": 1319, "padded": 0, "non-padded": 1319, "effective_few_shots": 5.0, "num_truncated_few_shots": 0 }, "harness|winogrande|5": { "hashes": { "hash_examples": "aada0a176fd81218", "hash_full_prompts": "c8655cbd12de8409", "hash_input_tokens": "c0bedf98cb040854", "hash_cont_tokens": "f08975ad6f2d5864" }, "truncated": 0, "non-truncated": 2534, "padded": 2432, "non-padded": 102, "effective_few_shots": 5.0, "num_truncated_few_shots": 0 } }, "summary_general": { "hashes": { "hash_examples": "9b4d8993161e637d", "hash_full_prompts": "08215e527b7e60a5", "hash_input_tokens": "a12f3e3c934bd78b", "hash_cont_tokens": "4d8f1e04b1d56e40" }, "total_evaluation_time_secondes": "6066.877633810043", "truncated": 3, "non-truncated": 13386, "padded": 2432, "non-padded": 10957, "num_truncated_few_shots": 0 } }