lm1-misc-pile
/
2b246b46b
/evaluation
/lm1-2b2-46b-results_lm-eval_global_step44073_2023-01-24-13-53-38_2shots.json
{ | |
"results": { | |
"anli_r1": { | |
"acc": 0.328, | |
"acc_stderr": 0.01485384248727033 | |
}, | |
"anli_r2": { | |
"acc": 0.328, | |
"acc_stderr": 0.014853842487270333 | |
}, | |
"anli_r3": { | |
"acc": 0.325, | |
"acc_stderr": 0.013526454480351025 | |
}, | |
"cb": { | |
"acc": 0.4107142857142857, | |
"acc_stderr": 0.0663363415035954, | |
"f1": 0.29069767441860467 | |
}, | |
"copa": { | |
"acc": 0.74, | |
"acc_stderr": 0.0440844002276808 | |
}, | |
"hellaswag": { | |
"acc": 0.351822346146186, | |
"acc_stderr": 0.004765629263643526, | |
"acc_norm": 0.43537143995220073, | |
"acc_norm_stderr": 0.004947922692688831 | |
}, | |
"rte": { | |
"acc": 0.49458483754512633, | |
"acc_stderr": 0.030094698123239966 | |
}, | |
"winogrande": { | |
"acc": 0.5438042620363063, | |
"acc_stderr": 0.013998453610924324 | |
}, | |
"storycloze_2016": { | |
"acc": 0.6424371993586317, | |
"acc_stderr": 0.01108334116882779 | |
}, | |
"boolq": { | |
"acc": 0.4828746177370031, | |
"acc_stderr": 0.008739923994130054 | |
}, | |
"arc_easy": { | |
"acc": 0.5618686868686869, | |
"acc_stderr": 0.010180937100600076, | |
"acc_norm": 0.5441919191919192, | |
"acc_norm_stderr": 0.010219631763437851 | |
}, | |
"arc_challenge": { | |
"acc": 0.25170648464163825, | |
"acc_stderr": 0.012682496334042961, | |
"acc_norm": 0.28924914675767915, | |
"acc_norm_stderr": 0.013250012579393443 | |
}, | |
"sciq": { | |
"acc": 0.902, | |
"acc_stderr": 0.009406619184621224, | |
"acc_norm": 0.901, | |
"acc_norm_stderr": 0.009449248027662732 | |
}, | |
"piqa": { | |
"acc": 0.6822633297062024, | |
"acc_stderr": 0.010863133246569285, | |
"acc_norm": 0.6806311207834603, | |
"acc_norm_stderr": 0.010877964076613742 | |
} | |
}, | |
"versions": { | |
"anli_r1": 0, | |
"anli_r2": 0, | |
"anli_r3": 0, | |
"cb": 1, | |
"copa": 0, | |
"hellaswag": 0, | |
"rte": 0, | |
"winogrande": 0, | |
"storycloze_2016": 0, | |
"boolq": 1, | |
"arc_easy": 0, | |
"arc_challenge": 0, | |
"sciq": 0, | |
"piqa": 0 | |
} | |
} |