{ "results": { "arc_easy": { "acc": 0.4414983164983165, "acc_stderr": 0.010189314382749946, "acc_norm": 0.3968855218855219, "acc_norm_stderr": 0.01003923680058321 }, "boolq": { "acc": 0.5749235474006116, "acc_stderr": 0.008646316159373179 }, "lambada_openai": { "ppl": 94.9912418583616, "ppl_stderr": 3.9681997912266134, "acc": 0.24083058412575198, "acc_stderr": 0.005957132284367873 }, "openbookqa": { "acc": 0.166, "acc_stderr": 0.016656616876531142, "acc_norm": 0.278, "acc_norm_stderr": 0.020055833888070914 }, "piqa": { "acc": 0.5973884657236126, "acc_stderr": 0.011442395233488698, "acc_norm": 0.5914036996735582, "acc_norm_stderr": 0.01146924038724515 }, "winogrande": { "acc": 0.48303078137332284, "acc_stderr": 0.014044390401612976 } }, "versions": { "arc_easy": 0, "boolq": 1, "lambada_openai": 0, "openbookqa": 0, "piqa": 0, "winogrande": 0 }, "config": { "model": "hf-causal-experimental", "model_args": "pretrained=BEE-spoke-data/mega-ar-126m-4k,revision=main,trust_remote_code=True,dtype='float'", "num_fewshot": 0, "batch_size": "4", "batch_sizes": [], "device": "cuda", "no_cache": false, "limit": null, "bootstrap_iters": 100000, "description_dict": {} } }