lm1-4b2-84b-c4seeds
/
4b284b21bc4seed4
/evaluation
/rankeval
/4b284b21bc4seed4_5_lm-eval_global_step80108_2023-02-15-11-04-03_5shots_backup.json
{ | |
"results": { | |
"anli_r1": { | |
"acc": 0.343, | |
"acc_stderr": 0.015019206922356951 | |
}, | |
"anli_r2": { | |
"acc": 0.332, | |
"acc_stderr": 0.014899597242811478 | |
}, | |
"anli_r3": { | |
"acc": 0.33166666666666667, | |
"acc_stderr": 0.01359683672948518 | |
}, | |
"cb": { | |
"acc": 0.39285714285714285, | |
"acc_stderr": 0.0658538889806635, | |
"f1": 0.22512077294685992 | |
}, | |
"copa": { | |
"acc": 0.79, | |
"acc_stderr": 0.040936018074033256 | |
}, | |
"hellaswag": { | |
"acc": 0.477096195976897, | |
"acc_stderr": 0.00498454354093234, | |
"acc_norm": 0.6394144592710616, | |
"acc_norm_stderr": 0.004791890625834213 | |
}, | |
"rte": { | |
"acc": 0.5306859205776173, | |
"acc_stderr": 0.030039730592197812 | |
}, | |
"winogrande": { | |
"acc": 0.590370955011839, | |
"acc_stderr": 0.013821049109655472 | |
}, | |
"storycloze_2016": { | |
"acc": 0.7247461250668092, | |
"acc_stderr": 0.010328538400500572 | |
}, | |
"boolq": { | |
"acc": 0.5896024464831804, | |
"acc_stderr": 0.008603488048617521 | |
}, | |
"arc_easy": { | |
"acc": 0.6308922558922558, | |
"acc_stderr": 0.009901987410242733, | |
"acc_norm": 0.6077441077441077, | |
"acc_norm_stderr": 0.010018744689650043 | |
}, | |
"arc_challenge": { | |
"acc": 0.3046075085324232, | |
"acc_stderr": 0.013449522109932487, | |
"acc_norm": 0.33361774744027306, | |
"acc_norm_stderr": 0.01377868705417654 | |
}, | |
"sciq": { | |
"acc": 0.916, | |
"acc_stderr": 0.008776162089491132, | |
"acc_norm": 0.909, | |
"acc_norm_stderr": 0.009099549538400243 | |
}, | |
"piqa": { | |
"acc": 0.7584330794341676, | |
"acc_stderr": 0.009986718001804456, | |
"acc_norm": 0.766050054406964, | |
"acc_norm_stderr": 0.009877236895137434 | |
} | |
}, | |
"versions": { | |
"anli_r1": 0, | |
"anli_r2": 0, | |
"anli_r3": 0, | |
"cb": 1, | |
"copa": 0, | |
"hellaswag": 0, | |
"rte": 0, | |
"winogrande": 0, | |
"storycloze_2016": 0, | |
"boolq": 1, | |
"arc_easy": 0, | |
"arc_challenge": 0, | |
"sciq": 0, | |
"piqa": 0 | |
} | |
} |