{ "results": { "arc_easy": { "acc": 0.43813131313131315, "acc_stderr": 0.010180937100600052, "acc_norm": 0.4019360269360269, "acc_norm_stderr": 0.010060521220920566 }, "boolq": { "acc": 0.617737003058104, "acc_stderr": 0.00849914969044927 }, "lambada_openai": { "ppl": 64.94966274873535, "ppl_stderr": 2.5466406639926897, "acc": 0.26470017465554047, "acc_stderr": 0.006146408462993569 }, "openbookqa": { "acc": 0.166, "acc_stderr": 0.016656616876531142, "acc_norm": 0.28, "acc_norm_stderr": 0.020099950647503237 }, "piqa": { "acc": 0.5973884657236126, "acc_stderr": 0.011442395233488702, "acc_norm": 0.6088139281828074, "acc_norm_stderr": 0.0113862156067287 }, "winogrande": { "acc": 0.5098658247829518, "acc_stderr": 0.014049749833367589 } }, "versions": { "arc_easy": 0, "boolq": 1, "lambada_openai": 0, "openbookqa": 0, "piqa": 0, "winogrande": 0 }, "config": { "model": "hf-causal-experimental", "model_args": "pretrained=BEE-spoke-data/smol_llama-220M-GQA,revision=main,trust_remote_code=True,dtype='bfloat16'", "num_fewshot": 0, "batch_size": "8", "batch_sizes": [], "device": "cuda", "no_cache": false, "limit": null, "bootstrap_iters": 100000, "description_dict": {} } }