lm1-1b1-21b-c4-repetitions
/
evaluation
/1b121b10bc4-results_lm-eval_global_step39672_2022-12-22-07-35-19.json
{ | |
"results": { | |
"copa": { | |
"acc": 0.72, | |
"acc_stderr": 0.04512608598542127 | |
}, | |
"boolq": { | |
"acc": 0.5235474006116208, | |
"acc_stderr": 0.008735351675636605 | |
}, | |
"hellaswag": { | |
"acc": 0.3556064528978291, | |
"acc_stderr": 0.0047771835089498215, | |
"acc_norm": 0.4304919338777136, | |
"acc_norm_stderr": 0.004941331215598556 | |
}, | |
"arc_challenge": { | |
"acc": 0.2175767918088737, | |
"acc_stderr": 0.012057262020972504, | |
"acc_norm": 0.2551194539249147, | |
"acc_norm_stderr": 0.012739038695202104 | |
}, | |
"arc_easy": { | |
"acc": 0.49242424242424243, | |
"acc_stderr": 0.010258605792153321, | |
"acc_norm": 0.43897306397306396, | |
"acc_norm_stderr": 0.010183076012972067 | |
}, | |
"sciq": { | |
"acc": 0.754, | |
"acc_stderr": 0.013626065817750638, | |
"acc_norm": 0.666, | |
"acc_norm_stderr": 0.01492201952373297 | |
}, | |
"winogrande": { | |
"acc": 0.5288082083662194, | |
"acc_stderr": 0.014029141615909615 | |
}, | |
"piqa": { | |
"acc": 0.70620239390642, | |
"acc_stderr": 0.010627574080514797, | |
"acc_norm": 0.7013057671381937, | |
"acc_norm_stderr": 0.010678556398149226 | |
}, | |
"rte": { | |
"acc": 0.5379061371841155, | |
"acc_stderr": 0.030009848912529117 | |
} | |
}, | |
"versions": { | |
"copa": 0, | |
"boolq": 1, | |
"hellaswag": 0, | |
"arc_challenge": 0, | |
"arc_easy": 0, | |
"sciq": 0, | |
"winogrande": 0, | |
"piqa": 0, | |
"rte": 0 | |
} | |
} |