lm5-2b8-55b-c4
/
evaluation
/rankeval_s_denoiser_44b
/checkpoints_2b855b55bc4ul2ndfixnew_3_lm-eval_global_step42000_2023-02-08-13-42-29_3shots_backup.json
{ | |
"results": { | |
"anli_r1": { | |
"acc": 0.321, | |
"acc_stderr": 0.01477082181793465 | |
}, | |
"anli_r2": { | |
"acc": 0.343, | |
"acc_stderr": 0.015019206922356953 | |
}, | |
"anli_r3": { | |
"acc": 0.33916666666666667, | |
"acc_stderr": 0.013672343491681822 | |
}, | |
"cb": { | |
"acc": 0.39285714285714285, | |
"acc_stderr": 0.0658538889806635, | |
"f1": 0.2593406593406593 | |
}, | |
"copa": { | |
"acc": 0.62, | |
"acc_stderr": 0.04878317312145633 | |
}, | |
"hellaswag": { | |
"acc": 0.29087831109340767, | |
"acc_stderr": 0.004532393111248679, | |
"acc_norm": 0.3136825333598885, | |
"acc_norm_stderr": 0.004630407476835188 | |
}, | |
"rte": { | |
"acc": 0.5090252707581228, | |
"acc_stderr": 0.030091559826331334 | |
}, | |
"winogrande": { | |
"acc": 0.5146014206787688, | |
"acc_stderr": 0.014046492383275835 | |
}, | |
"storycloze_2016": { | |
"acc": 0.5783003741314805, | |
"acc_stderr": 0.011419774841868156 | |
}, | |
"boolq": { | |
"acc": 0.5495412844036697, | |
"acc_stderr": 0.008702022442950878 | |
}, | |
"arc_easy": { | |
"acc": 0.4090909090909091, | |
"acc_stderr": 0.010088775152615779, | |
"acc_norm": 0.3686868686868687, | |
"acc_norm_stderr": 0.009899640855681038 | |
}, | |
"arc_challenge": { | |
"acc": 0.18771331058020477, | |
"acc_stderr": 0.011411001314155136, | |
"acc_norm": 0.22098976109215018, | |
"acc_norm_stderr": 0.012124929206818258 | |
}, | |
"sciq": { | |
"acc": 0.694, | |
"acc_stderr": 0.014580006055436972, | |
"acc_norm": 0.652, | |
"acc_norm_stderr": 0.015070604603768408 | |
}, | |
"piqa": { | |
"acc": 0.6354733405875952, | |
"acc_stderr": 0.011229456510295966, | |
"acc_norm": 0.6262241566920566, | |
"acc_norm_stderr": 0.011287972563201014 | |
} | |
}, | |
"versions": { | |
"anli_r1": 0, | |
"anli_r2": 0, | |
"anli_r3": 0, | |
"cb": 1, | |
"copa": 0, | |
"hellaswag": 0, | |
"rte": 0, | |
"winogrande": 0, | |
"storycloze_2016": 0, | |
"boolq": 1, | |
"arc_easy": 0, | |
"arc_challenge": 0, | |
"sciq": 0, | |
"piqa": 0 | |
} | |
} |