lm1-1b1-21b-c4-repetitions
/
evaluation
/1b121b4bc4-results_lm-eval_global_step39672_2022-12-22-07-35-31.json
{ | |
"results": { | |
"copa": { | |
"acc": 0.67, | |
"acc_stderr": 0.04725815626252607 | |
}, | |
"boolq": { | |
"acc": 0.5788990825688073, | |
"acc_stderr": 0.008635491562221343 | |
}, | |
"hellaswag": { | |
"acc": 0.3541127265484963, | |
"acc_stderr": 0.00477266165962884, | |
"acc_norm": 0.43089026090420235, | |
"acc_norm_stderr": 0.004941887610849026 | |
}, | |
"arc_challenge": { | |
"acc": 0.21160409556313994, | |
"acc_stderr": 0.011935916358632859, | |
"acc_norm": 0.2551194539249147, | |
"acc_norm_stderr": 0.012739038695202105 | |
}, | |
"arc_easy": { | |
"acc": 0.49242424242424243, | |
"acc_stderr": 0.01025860579215332, | |
"acc_norm": 0.4318181818181818, | |
"acc_norm_stderr": 0.010163945352271709 | |
}, | |
"sciq": { | |
"acc": 0.75, | |
"acc_stderr": 0.013699915608779773, | |
"acc_norm": 0.654, | |
"acc_norm_stderr": 0.015050266127564448 | |
}, | |
"winogrande": { | |
"acc": 0.505130228887135, | |
"acc_stderr": 0.01405174596179052 | |
}, | |
"piqa": { | |
"acc": 0.6991294885745375, | |
"acc_stderr": 0.010700745724145973, | |
"acc_norm": 0.7002176278563657, | |
"acc_norm_stderr": 0.010689686967138092 | |
}, | |
"rte": { | |
"acc": 0.5379061371841155, | |
"acc_stderr": 0.030009848912529117 | |
} | |
}, | |
"versions": { | |
"copa": 0, | |
"boolq": 1, | |
"hellaswag": 0, | |
"arc_challenge": 0, | |
"arc_easy": 0, | |
"sciq": 0, | |
"winogrande": 0, | |
"piqa": 0, | |
"rte": 0 | |
} | |
} |