lm5-2b8-55b-c4
/
evaluation
/rankeval_x_denoiser
/checkpoints_2b855b55bc4ul2ndfixnew_1_lm-eval_global_step52452_2023-02-09-23-08-31_1shots_backup.json
{ | |
"results": { | |
"anli_r1": { | |
"acc": 0.359, | |
"acc_stderr": 0.015177264224798594 | |
}, | |
"anli_r2": { | |
"acc": 0.349, | |
"acc_stderr": 0.015080663991563102 | |
}, | |
"anli_r3": { | |
"acc": 0.32, | |
"acc_stderr": 0.01347162092976915 | |
}, | |
"cb": { | |
"acc": 0.39285714285714285, | |
"acc_stderr": 0.0658538889806635, | |
"f1": 0.27365967365967364 | |
}, | |
"copa": { | |
"acc": 0.64, | |
"acc_stderr": 0.048241815132442176 | |
}, | |
"hellaswag": { | |
"acc": 0.2909778928500299, | |
"acc_stderr": 0.004532850566893522, | |
"acc_norm": 0.31955785700059747, | |
"acc_norm_stderr": 0.004653523038369371 | |
}, | |
"rte": { | |
"acc": 0.5523465703971119, | |
"acc_stderr": 0.02993107036293953 | |
}, | |
"winogrande": { | |
"acc": 0.5019731649565904, | |
"acc_stderr": 0.014052376259225632 | |
}, | |
"storycloze_2016": { | |
"acc": 0.5783003741314805, | |
"acc_stderr": 0.011419774841868156 | |
}, | |
"boolq": { | |
"acc": 0.5122324159021406, | |
"acc_stderr": 0.008742437504570405 | |
}, | |
"arc_easy": { | |
"acc": 0.43097643097643096, | |
"acc_stderr": 0.010161552863493744, | |
"acc_norm": 0.3792087542087542, | |
"acc_norm_stderr": 0.00995589166886556 | |
}, | |
"arc_challenge": { | |
"acc": 0.1885665529010239, | |
"acc_stderr": 0.0114308976476758, | |
"acc_norm": 0.2158703071672355, | |
"acc_norm_stderr": 0.012022975360030668 | |
}, | |
"sciq": { | |
"acc": 0.703, | |
"acc_stderr": 0.0144568322948011, | |
"acc_norm": 0.659, | |
"acc_norm_stderr": 0.014998131348402706 | |
}, | |
"piqa": { | |
"acc": 0.6224156692056583, | |
"acc_stderr": 0.011310782787145781, | |
"acc_norm": 0.6158868335146899, | |
"acc_norm_stderr": 0.011348160741479136 | |
} | |
}, | |
"versions": { | |
"anli_r1": 0, | |
"anli_r2": 0, | |
"anli_r3": 0, | |
"cb": 1, | |
"copa": 0, | |
"hellaswag": 0, | |
"rte": 0, | |
"winogrande": 0, | |
"storycloze_2016": 0, | |
"boolq": 1, | |
"arc_easy": 0, | |
"arc_challenge": 0, | |
"sciq": 0, | |
"piqa": 0 | |
} | |
} |