lm5-2b8-55b-c4
/
evaluation
/rankeval_s_denoiser_44b
/checkpoints_2b855b55bc4ul2ndfixnew_5_lm-eval_global_step42000_2023-02-08-13-42-29_5shots_backup.json
{ | |
"results": { | |
"anli_r1": { | |
"acc": 0.353, | |
"acc_stderr": 0.01512017260548369 | |
}, | |
"anli_r2": { | |
"acc": 0.342, | |
"acc_stderr": 0.01500870618212173 | |
}, | |
"anli_r3": { | |
"acc": 0.3575, | |
"acc_stderr": 0.013840921245257794 | |
}, | |
"cb": { | |
"acc": 0.5, | |
"acc_stderr": 0.06741998624632421, | |
"f1": 0.34521263958184845 | |
}, | |
"copa": { | |
"acc": 0.56, | |
"acc_stderr": 0.04988876515698589 | |
}, | |
"hellaswag": { | |
"acc": 0.2891854212308305, | |
"acc_stderr": 0.004524575892952968, | |
"acc_norm": 0.3157737502489544, | |
"acc_norm_stderr": 0.004638733202373885 | |
}, | |
"rte": { | |
"acc": 0.5379061371841155, | |
"acc_stderr": 0.030009848912529117 | |
}, | |
"winogrande": { | |
"acc": 0.5019731649565904, | |
"acc_stderr": 0.014052376259225632 | |
}, | |
"storycloze_2016": { | |
"acc": 0.5793693212185996, | |
"acc_stderr": 0.011415827994342657 | |
}, | |
"boolq": { | |
"acc": 0.5370030581039755, | |
"acc_stderr": 0.008721074177479658 | |
}, | |
"arc_easy": { | |
"acc": 0.4031986531986532, | |
"acc_stderr": 0.010065668576794787, | |
"acc_norm": 0.37457912457912457, | |
"acc_norm_stderr": 0.00993175882041061 | |
}, | |
"arc_challenge": { | |
"acc": 0.189419795221843, | |
"acc_stderr": 0.01145070511591077, | |
"acc_norm": 0.22696245733788395, | |
"acc_norm_stderr": 0.012240491536132873 | |
}, | |
"sciq": { | |
"acc": 0.71, | |
"acc_stderr": 0.01435639599990569, | |
"acc_norm": 0.665, | |
"acc_norm_stderr": 0.014933117490932572 | |
}, | |
"piqa": { | |
"acc": 0.6229597388465724, | |
"acc_stderr": 0.011307569752543902, | |
"acc_norm": 0.6126224156692056, | |
"acc_norm_stderr": 0.011366038083435908 | |
} | |
}, | |
"versions": { | |
"anli_r1": 0, | |
"anli_r2": 0, | |
"anli_r3": 0, | |
"cb": 1, | |
"copa": 0, | |
"hellaswag": 0, | |
"rte": 0, | |
"winogrande": 0, | |
"storycloze_2016": 0, | |
"boolq": 1, | |
"arc_easy": 0, | |
"arc_challenge": 0, | |
"sciq": 0, | |
"piqa": 0 | |
} | |
} |