Muennighoff's picture
Add eval
5c159b1
raw
history blame
2.46 kB
{
"results": {
"anli_r1": {
"acc": 0.319,
"acc_stderr": 0.014746404865473474
},
"anli_r2": {
"acc": 0.343,
"acc_stderr": 0.015019206922356953
},
"anli_r3": {
"acc": 0.3416666666666667,
"acc_stderr": 0.013696658778002519
},
"cb": {
"acc": 0.5357142857142857,
"acc_stderr": 0.06724777654937658,
"f1": 0.3757011576560449
},
"copa": {
"acc": 0.76,
"acc_stderr": 0.04292346959909282
},
"hellaswag": {
"acc": 0.4500099581756622,
"acc_stderr": 0.004964779805180658,
"acc_norm": 0.5928101971718781,
"acc_norm_stderr": 0.0049030666397619485
},
"rte": {
"acc": 0.51985559566787,
"acc_stderr": 0.030072723167317177
},
"winogrande": {
"acc": 0.5737963693764798,
"acc_stderr": 0.013898585965412338
},
"storycloze_2016": {
"acc": 0.6990913949759487,
"acc_stderr": 0.010606289538707334
},
"boolq": {
"acc": 0.6107033639143731,
"acc_stderr": 0.00852801629098454
},
"arc_easy": {
"acc": 0.6452020202020202,
"acc_stderr": 0.009817629113069696,
"acc_norm": 0.6363636363636364,
"acc_norm_stderr": 0.009870849346011758
},
"arc_challenge": {
"acc": 0.29180887372013653,
"acc_stderr": 0.013284525292403503,
"acc_norm": 0.3174061433447099,
"acc_norm_stderr": 0.01360223908803817
},
"sciq": {
"acc": 0.923,
"acc_stderr": 0.008434580140240643,
"acc_norm": 0.915,
"acc_norm_stderr": 0.00882342636694233
},
"piqa": {
"acc": 0.7399347116430903,
"acc_stderr": 0.010234893249061303,
"acc_norm": 0.7513601741022851,
"acc_norm_stderr": 0.01008451123429685
}
},
"versions": {
"anli_r1": 0,
"anli_r2": 0,
"anli_r3": 0,
"cb": 1,
"copa": 0,
"hellaswag": 0,
"rte": 0,
"winogrande": 0,
"storycloze_2016": 0,
"boolq": 1,
"arc_easy": 0,
"arc_challenge": 0,
"sciq": 0,
"piqa": 0
}
}