Muennighoff's picture
Add eval
b851397
raw
history blame
2.45 kB
{
"results": {
"anli_r1": {
"acc": 0.336,
"acc_stderr": 0.01494414023379502
},
"anli_r2": {
"acc": 0.315,
"acc_stderr": 0.014696631960792506
},
"anli_r3": {
"acc": 0.34,
"acc_stderr": 0.0136804957257678
},
"cb": {
"acc": 0.5357142857142857,
"acc_stderr": 0.06724777654937658,
"f1": 0.38181818181818183
},
"copa": {
"acc": 0.74,
"acc_stderr": 0.04408440022768077
},
"hellaswag": {
"acc": 0.48137821151165106,
"acc_stderr": 0.004986319587524962,
"acc_norm": 0.6344353714399522,
"acc_norm_stderr": 0.004806039039008954
},
"rte": {
"acc": 0.5451263537906137,
"acc_stderr": 0.029973636495415252
},
"winogrande": {
"acc": 0.5974743488555643,
"acc_stderr": 0.013782866831703048
},
"storycloze_2016": {
"acc": 0.7044361304115446,
"acc_stderr": 0.01055177883937378
},
"boolq": {
"acc": 0.5669724770642202,
"acc_stderr": 0.008666251305518059
},
"arc_easy": {
"acc": 0.6220538720538721,
"acc_stderr": 0.009949405744045452,
"acc_norm": 0.5787037037037037,
"acc_norm_stderr": 0.010131882498193127
},
"arc_challenge": {
"acc": 0.29266211604095566,
"acc_stderr": 0.01329591610361942,
"acc_norm": 0.32849829351535836,
"acc_norm_stderr": 0.013724978465537357
},
"sciq": {
"acc": 0.891,
"acc_stderr": 0.00985982840703719,
"acc_norm": 0.871,
"acc_norm_stderr": 0.010605256784796579
},
"piqa": {
"acc": 0.7551686615886833,
"acc_stderr": 0.010032309105568788,
"acc_norm": 0.764961915125136,
"acc_norm_stderr": 0.009893146688805308
}
},
"versions": {
"anli_r1": 0,
"anli_r2": 0,
"anli_r3": 0,
"cb": 1,
"copa": 0,
"hellaswag": 0,
"rte": 0,
"winogrande": 0,
"storycloze_2016": 0,
"boolq": 1,
"arc_easy": 0,
"arc_challenge": 0,
"sciq": 0,
"piqa": 0
}
}