lm1-4b2-84b-c4seeds / 4b284b21bc4seed4 /evaluation /rankeval /4b284b21bc4seed4_5_lm-eval_global_step80108_2023-02-15-11-04-03_5shots_backup.json
Muennighoff's picture
Add
31b4221
raw
history blame
2.46 kB
{
"results": {
"anli_r1": {
"acc": 0.343,
"acc_stderr": 0.015019206922356951
},
"anli_r2": {
"acc": 0.332,
"acc_stderr": 0.014899597242811478
},
"anli_r3": {
"acc": 0.33166666666666667,
"acc_stderr": 0.01359683672948518
},
"cb": {
"acc": 0.39285714285714285,
"acc_stderr": 0.0658538889806635,
"f1": 0.22512077294685992
},
"copa": {
"acc": 0.79,
"acc_stderr": 0.040936018074033256
},
"hellaswag": {
"acc": 0.477096195976897,
"acc_stderr": 0.00498454354093234,
"acc_norm": 0.6394144592710616,
"acc_norm_stderr": 0.004791890625834213
},
"rte": {
"acc": 0.5306859205776173,
"acc_stderr": 0.030039730592197812
},
"winogrande": {
"acc": 0.590370955011839,
"acc_stderr": 0.013821049109655472
},
"storycloze_2016": {
"acc": 0.7247461250668092,
"acc_stderr": 0.010328538400500572
},
"boolq": {
"acc": 0.5896024464831804,
"acc_stderr": 0.008603488048617521
},
"arc_easy": {
"acc": 0.6308922558922558,
"acc_stderr": 0.009901987410242733,
"acc_norm": 0.6077441077441077,
"acc_norm_stderr": 0.010018744689650043
},
"arc_challenge": {
"acc": 0.3046075085324232,
"acc_stderr": 0.013449522109932487,
"acc_norm": 0.33361774744027306,
"acc_norm_stderr": 0.01377868705417654
},
"sciq": {
"acc": 0.916,
"acc_stderr": 0.008776162089491132,
"acc_norm": 0.909,
"acc_norm_stderr": 0.009099549538400243
},
"piqa": {
"acc": 0.7584330794341676,
"acc_stderr": 0.009986718001804456,
"acc_norm": 0.766050054406964,
"acc_norm_stderr": 0.009877236895137434
}
},
"versions": {
"anli_r1": 0,
"anli_r2": 0,
"anli_r3": 0,
"cb": 1,
"copa": 0,
"hellaswag": 0,
"rte": 0,
"winogrande": 0,
"storycloze_2016": 0,
"boolq": 1,
"arc_easy": 0,
"arc_challenge": 0,
"sciq": 0,
"piqa": 0
}
}