lm5-2b8-55b-c4
/
evaluation
/rankeval_r_denoiser
/checkpoints_2b855b55bc4ul2ndfixnew_1_lm-eval_global_step52452_2023-02-09-23-24-23_1shots_backup.json
{ | |
"results": { | |
"anli_r1": { | |
"acc": 0.36, | |
"acc_stderr": 0.015186527932040122 | |
}, | |
"anli_r2": { | |
"acc": 0.349, | |
"acc_stderr": 0.015080663991563102 | |
}, | |
"anli_r3": { | |
"acc": 0.32166666666666666, | |
"acc_stderr": 0.01349009528298952 | |
}, | |
"cb": { | |
"acc": 0.4107142857142857, | |
"acc_stderr": 0.0663363415035954, | |
"f1": 0.28651292802236195 | |
}, | |
"copa": { | |
"acc": 0.64, | |
"acc_stderr": 0.048241815132442176 | |
}, | |
"hellaswag": { | |
"acc": 0.2913762198765186, | |
"acc_stderr": 0.004534677750102734, | |
"acc_norm": 0.3249352718581956, | |
"acc_norm_stderr": 0.0046739348371504464 | |
}, | |
"rte": { | |
"acc": 0.5523465703971119, | |
"acc_stderr": 0.02993107036293953 | |
}, | |
"winogrande": { | |
"acc": 0.494869771112865, | |
"acc_stderr": 0.014051745961790513 | |
}, | |
"storycloze_2016": { | |
"acc": 0.5793693212185996, | |
"acc_stderr": 0.01141582799434265 | |
}, | |
"boolq": { | |
"acc": 0.5110091743119266, | |
"acc_stderr": 0.008742934884517647 | |
}, | |
"arc_easy": { | |
"acc": 0.4297138047138047, | |
"acc_stderr": 0.010157908005763678, | |
"acc_norm": 0.3792087542087542, | |
"acc_norm_stderr": 0.00995589166886556 | |
}, | |
"arc_challenge": { | |
"acc": 0.1825938566552901, | |
"acc_stderr": 0.011289730684564982, | |
"acc_norm": 0.21928327645051193, | |
"acc_norm_stderr": 0.012091245787615734 | |
}, | |
"sciq": { | |
"acc": 0.705, | |
"acc_stderr": 0.014428554438445517, | |
"acc_norm": 0.658, | |
"acc_norm_stderr": 0.015008706182121731 | |
}, | |
"piqa": { | |
"acc": 0.6218715995647442, | |
"acc_stderr": 0.011313980666854535, | |
"acc_norm": 0.6109902067464635, | |
"acc_norm_stderr": 0.011374774974447464 | |
} | |
}, | |
"versions": { | |
"anli_r1": 0, | |
"anli_r2": 0, | |
"anli_r3": 0, | |
"cb": 1, | |
"copa": 0, | |
"hellaswag": 0, | |
"rte": 0, | |
"winogrande": 0, | |
"storycloze_2016": 0, | |
"boolq": 1, | |
"arc_easy": 0, | |
"arc_challenge": 0, | |
"sciq": 0, | |
"piqa": 0 | |
} | |
} |