Muennighoff's picture
Add eval
9e5d26a
raw
history blame
No virus
2.46 kB
{
"results": {
"anli_r1": {
"acc": 0.335,
"acc_stderr": 0.014933117490932575
},
"anli_r2": {
"acc": 0.337,
"acc_stderr": 0.014955087918653605
},
"anli_r3": {
"acc": 0.33916666666666667,
"acc_stderr": 0.013672343491681819
},
"cb": {
"acc": 0.4107142857142857,
"acc_stderr": 0.0663363415035954,
"f1": 0.1940928270042194
},
"copa": {
"acc": 0.77,
"acc_stderr": 0.04229525846816506
},
"hellaswag": {
"acc": 0.44911372236606256,
"acc_stderr": 0.004963872936857941,
"acc_norm": 0.5824536944831706,
"acc_norm_stderr": 0.004921466591335056
},
"rte": {
"acc": 0.5812274368231047,
"acc_stderr": 0.02969666108123482
},
"winogrande": {
"acc": 0.5698500394632992,
"acc_stderr": 0.013914685094716694
},
"storycloze_2016": {
"acc": 0.7071084981293426,
"acc_stderr": 0.010523873293246305
},
"boolq": {
"acc": 0.5642201834862385,
"acc_stderr": 0.00867262173201595
},
"arc_easy": {
"acc": 0.6069023569023569,
"acc_stderr": 0.010022540618945312,
"acc_norm": 0.5526094276094277,
"acc_norm_stderr": 0.01020283238541565
},
"arc_challenge": {
"acc": 0.2738907849829352,
"acc_stderr": 0.013032004972989501,
"acc_norm": 0.2935153583617747,
"acc_norm_stderr": 0.013307250444941117
},
"sciq": {
"acc": 0.869,
"acc_stderr": 0.010674874844837952,
"acc_norm": 0.797,
"acc_norm_stderr": 0.012726073744598285
},
"piqa": {
"acc": 0.7421109902067464,
"acc_stderr": 0.010206956662056262,
"acc_norm": 0.7470076169749728,
"acc_norm_stderr": 0.010142888698862455
}
},
"versions": {
"anli_r1": 0,
"anli_r2": 0,
"anli_r3": 0,
"cb": 1,
"copa": 0,
"hellaswag": 0,
"rte": 0,
"winogrande": 0,
"storycloze_2016": 0,
"boolq": 1,
"arc_easy": 0,
"arc_challenge": 0,
"sciq": 0,
"piqa": 0
}
}