Muennighoff's picture
Add eval
5c159b1
raw
history blame
2.46 kB
{
"results": {
"anli_r1": {
"acc": 0.325,
"acc_stderr": 0.014818724459095524
},
"anli_r2": {
"acc": 0.342,
"acc_stderr": 0.01500870618212173
},
"anli_r3": {
"acc": 0.31666666666666665,
"acc_stderr": 0.013434078660827388
},
"cb": {
"acc": 0.4642857142857143,
"acc_stderr": 0.06724777654937658,
"f1": 0.3289760348583877
},
"copa": {
"acc": 0.77,
"acc_stderr": 0.04229525846816506
},
"hellaswag": {
"acc": 0.45160326628161723,
"acc_stderr": 0.004966351835028203,
"acc_norm": 0.5888269269069907,
"acc_norm_stderr": 0.004910409150135493
},
"rte": {
"acc": 0.48375451263537905,
"acc_stderr": 0.030080573208738064
},
"winogrande": {
"acc": 0.5682715074980268,
"acc_stderr": 0.01392087211001071
},
"storycloze_2016": {
"acc": 0.6953500801710315,
"acc_stderr": 0.010643426988646796
},
"boolq": {
"acc": 0.608868501529052,
"acc_stderr": 0.008535239054221166
},
"arc_easy": {
"acc": 0.6342592592592593,
"acc_stderr": 0.009882988069418822,
"acc_norm": 0.6186868686868687,
"acc_norm_stderr": 0.009966542497171021
},
"arc_challenge": {
"acc": 0.2909556313993174,
"acc_stderr": 0.013273077865907578,
"acc_norm": 0.3122866894197952,
"acc_norm_stderr": 0.013542598541688067
},
"sciq": {
"acc": 0.916,
"acc_stderr": 0.008776162089491137,
"acc_norm": 0.915,
"acc_norm_stderr": 0.008823426366942312
},
"piqa": {
"acc": 0.733949945593036,
"acc_stderr": 0.010310039263352831,
"acc_norm": 0.7486398258977149,
"acc_norm_stderr": 0.010121156016819243
}
},
"versions": {
"anli_r1": 0,
"anli_r2": 0,
"anli_r3": 0,
"cb": 1,
"copa": 0,
"hellaswag": 0,
"rte": 0,
"winogrande": 0,
"storycloze_2016": 0,
"boolq": 1,
"arc_easy": 0,
"arc_challenge": 0,
"sciq": 0,
"piqa": 0
}
}