lm1-misc-pile
/
1b58b88b8
/1b58b88b8pile
/evaluation
/lm1-1b5-8b8-results_lm-eval_global_step16765_2023-01-24-13-53-33_2shots.json
{ | |
"results": { | |
"anli_r1": { | |
"acc": 0.307, | |
"acc_stderr": 0.01459328489285262 | |
}, | |
"anli_r2": { | |
"acc": 0.336, | |
"acc_stderr": 0.014944140233795027 | |
}, | |
"anli_r3": { | |
"acc": 0.3308333333333333, | |
"acc_stderr": 0.013588208070709 | |
}, | |
"cb": { | |
"acc": 0.44642857142857145, | |
"acc_stderr": 0.06703189227942398, | |
"f1": 0.2809560132214594 | |
}, | |
"copa": { | |
"acc": 0.66, | |
"acc_stderr": 0.04760952285695237 | |
}, | |
"hellaswag": { | |
"acc": 0.28540131447918743, | |
"acc_stderr": 0.0045068240943332985, | |
"acc_norm": 0.30790679147580163, | |
"acc_norm_stderr": 0.004606843344517482 | |
}, | |
"rte": { | |
"acc": 0.516245487364621, | |
"acc_stderr": 0.030080573208738064 | |
}, | |
"winogrande": { | |
"acc": 0.5043409629044988, | |
"acc_stderr": 0.014051956064076892 | |
}, | |
"storycloze_2016": { | |
"acc": 0.5660074826296099, | |
"acc_stderr": 0.011461234645182191 | |
}, | |
"boolq": { | |
"acc": 0.5617737003058104, | |
"acc_stderr": 0.008678056241208772 | |
}, | |
"arc_easy": { | |
"acc": 0.45454545454545453, | |
"acc_stderr": 0.010217299762709428, | |
"acc_norm": 0.4305555555555556, | |
"acc_norm_stderr": 0.010160345396860075 | |
}, | |
"arc_challenge": { | |
"acc": 0.20136518771331058, | |
"acc_stderr": 0.011718927477444272, | |
"acc_norm": 0.2440273037542662, | |
"acc_norm_stderr": 0.012551447627856253 | |
}, | |
"sciq": { | |
"acc": 0.798, | |
"acc_stderr": 0.012702651587655139, | |
"acc_norm": 0.766, | |
"acc_norm_stderr": 0.013394902889660009 | |
}, | |
"piqa": { | |
"acc": 0.6077257889009793, | |
"acc_stderr": 0.011391846744072232, | |
"acc_norm": 0.6169749727965179, | |
"acc_norm_stderr": 0.011342081709082845 | |
} | |
}, | |
"versions": { | |
"anli_r1": 0, | |
"anli_r2": 0, | |
"anli_r3": 0, | |
"cb": 1, | |
"copa": 0, | |
"hellaswag": 0, | |
"rte": 0, | |
"winogrande": 0, | |
"storycloze_2016": 0, | |
"boolq": 1, | |
"arc_easy": 0, | |
"arc_challenge": 0, | |
"sciq": 0, | |
"piqa": 0 | |
} | |
} |