lm5-2b8-55b-c4
/
evaluation
/rankeval_r_denoiser
/checkpoints_2b855b55bc4ul2ndfixnew_2_lm-eval_global_step52452_2023-02-09-23-24-23_2shots_backup.json
{ | |
"results": { | |
"anli_r1": { | |
"acc": 0.311, | |
"acc_stderr": 0.014645596385722695 | |
}, | |
"anli_r2": { | |
"acc": 0.356, | |
"acc_stderr": 0.015149042659306625 | |
}, | |
"anli_r3": { | |
"acc": 0.33666666666666667, | |
"acc_stderr": 0.01364760294240639 | |
}, | |
"cb": { | |
"acc": 0.4642857142857143, | |
"acc_stderr": 0.06724777654937658, | |
"f1": 0.316548463356974 | |
}, | |
"copa": { | |
"acc": 0.63, | |
"acc_stderr": 0.048523658709391 | |
}, | |
"hellaswag": { | |
"acc": 0.29047998406691894, | |
"acc_stderr": 0.004530560646902538, | |
"acc_norm": 0.3179645488946425, | |
"acc_norm_stderr": 0.004647338877642189 | |
}, | |
"rte": { | |
"acc": 0.48736462093862815, | |
"acc_stderr": 0.030086851767188564 | |
}, | |
"winogrande": { | |
"acc": 0.5098658247829518, | |
"acc_stderr": 0.014049749833367596 | |
}, | |
"storycloze_2016": { | |
"acc": 0.5793693212185996, | |
"acc_stderr": 0.011415827994342655 | |
}, | |
"boolq": { | |
"acc": 0.4746177370030581, | |
"acc_stderr": 0.008733779541853504 | |
}, | |
"arc_easy": { | |
"acc": 0.42297979797979796, | |
"acc_stderr": 0.010137328382209104, | |
"acc_norm": 0.39057239057239057, | |
"acc_norm_stderr": 0.010011059112064229 | |
}, | |
"arc_challenge": { | |
"acc": 0.18515358361774745, | |
"acc_stderr": 0.011350774438389699, | |
"acc_norm": 0.22525597269624573, | |
"acc_norm_stderr": 0.01220783999540731 | |
}, | |
"sciq": { | |
"acc": 0.727, | |
"acc_stderr": 0.014095022868717607, | |
"acc_norm": 0.677, | |
"acc_norm_stderr": 0.014794927843348635 | |
}, | |
"piqa": { | |
"acc": 0.6316648531011969, | |
"acc_stderr": 0.011254089354334373, | |
"acc_norm": 0.6294885745375408, | |
"acc_norm_stderr": 0.01126782647544766 | |
} | |
}, | |
"versions": { | |
"anli_r1": 0, | |
"anli_r2": 0, | |
"anli_r3": 0, | |
"cb": 1, | |
"copa": 0, | |
"hellaswag": 0, | |
"rte": 0, | |
"winogrande": 0, | |
"storycloze_2016": 0, | |
"boolq": 1, | |
"arc_easy": 0, | |
"arc_challenge": 0, | |
"sciq": 0, | |
"piqa": 0 | |
} | |
} |