lm5-2b8-55b-c4 / evaluation /rankeval_s_denoiser /checkpoints_2b855b55bc4ul2ndfixnew_2_lm-eval_global_step52452_2023-02-09-17-36-57_2shots_backup.json
Muennighoff's picture
Add files
79391b8
raw
history blame contribute delete
No virus
2.45 kB
{
"results": {
"anli_r1": {
"acc": 0.308,
"acc_stderr": 0.014606483127342763
},
"anli_r2": {
"acc": 0.353,
"acc_stderr": 0.015120172605483696
},
"anli_r3": {
"acc": 0.3375,
"acc_stderr": 0.013655897185463653
},
"cb": {
"acc": 0.4642857142857143,
"acc_stderr": 0.06724777654937658,
"f1": 0.316548463356974
},
"copa": {
"acc": 0.61,
"acc_stderr": 0.04902071300001975
},
"hellaswag": {
"acc": 0.2929695279824736,
"acc_stderr": 0.004541944342035901,
"acc_norm": 0.31866162119099783,
"acc_norm_stderr": 0.00465005215009441
},
"rte": {
"acc": 0.47653429602888087,
"acc_stderr": 0.030063300411902652
},
"winogrande": {
"acc": 0.5011838989739542,
"acc_stderr": 0.014052446290529012
},
"storycloze_2016": {
"acc": 0.5836451095670764,
"acc_stderr": 0.011399490926937005
},
"boolq": {
"acc": 0.4776758409785933,
"acc_stderr": 0.00873633411558504
},
"arc_easy": {
"acc": 0.42634680134680136,
"acc_stderr": 0.010147858603835139,
"acc_norm": 0.3926767676767677,
"acc_norm_stderr": 0.010020646555538686
},
"arc_challenge": {
"acc": 0.18344709897610922,
"acc_stderr": 0.011310170179554543,
"acc_norm": 0.22696245733788395,
"acc_norm_stderr": 0.012240491536132879
},
"sciq": {
"acc": 0.73,
"acc_stderr": 0.014046255632633915,
"acc_norm": 0.677,
"acc_norm_stderr": 0.014794927843348633
},
"piqa": {
"acc": 0.6349292709466812,
"acc_stderr": 0.011233021830554829,
"acc_norm": 0.6251360174102285,
"acc_norm_stderr": 0.011294565805619019
}
},
"versions": {
"anli_r1": 0,
"anli_r2": 0,
"anli_r3": 0,
"cb": 1,
"copa": 0,
"hellaswag": 0,
"rte": 0,
"winogrande": 0,
"storycloze_2016": 0,
"boolq": 1,
"arc_easy": 0,
"arc_challenge": 0,
"sciq": 0,
"piqa": 0
}
}