lm1-misc-pile
/
619m22b22b
/evaluation
/rankeval
/lm1-619m-22b-results_lm-eval_global_step41007_2023-01-24-13-57-03_3shots.json
{ | |
"results": { | |
"anli_r1": { | |
"acc": 0.326, | |
"acc_stderr": 0.014830507204541024 | |
}, | |
"anli_r2": { | |
"acc": 0.358, | |
"acc_stderr": 0.015167928865407557 | |
}, | |
"anli_r3": { | |
"acc": 0.3375, | |
"acc_stderr": 0.013655897185463665 | |
}, | |
"cb": { | |
"acc": 0.35714285714285715, | |
"acc_stderr": 0.0646095738380922, | |
"f1": 0.23561507936507933 | |
}, | |
"copa": { | |
"acc": 0.69, | |
"acc_stderr": 0.04648231987117316 | |
}, | |
"hellaswag": { | |
"acc": 0.30233021310495917, | |
"acc_stderr": 0.004583289072937737, | |
"acc_norm": 0.3353913563035252, | |
"acc_norm_stderr": 0.004711622011148457 | |
}, | |
"rte": { | |
"acc": 0.5415162454873647, | |
"acc_stderr": 0.029992535385373314 | |
}, | |
"winogrande": { | |
"acc": 0.5193370165745856, | |
"acc_stderr": 0.01404197273371297 | |
}, | |
"storycloze_2016": { | |
"acc": 0.6002137894174239, | |
"acc_stderr": 0.011327813397531862 | |
}, | |
"boolq": { | |
"acc": 0.5299694189602446, | |
"acc_stderr": 0.0087293318183149 | |
}, | |
"arc_easy": { | |
"acc": 0.4882154882154882, | |
"acc_stderr": 0.010256933475911015, | |
"acc_norm": 0.4671717171717172, | |
"acc_norm_stderr": 0.010237645778853851 | |
}, | |
"arc_challenge": { | |
"acc": 0.2226962457337884, | |
"acc_stderr": 0.012158314774829924, | |
"acc_norm": 0.25170648464163825, | |
"acc_norm_stderr": 0.012682496334042961 | |
}, | |
"sciq": { | |
"acc": 0.834, | |
"acc_stderr": 0.011772110370812192, | |
"acc_norm": 0.809, | |
"acc_norm_stderr": 0.012436787112179486 | |
}, | |
"piqa": { | |
"acc": 0.6436343852013058, | |
"acc_stderr": 0.011174109865864703, | |
"acc_norm": 0.6436343852013058, | |
"acc_norm_stderr": 0.011174109865864729 | |
} | |
}, | |
"versions": { | |
"anli_r1": 0, | |
"anli_r2": 0, | |
"anli_r3": 0, | |
"cb": 1, | |
"copa": 0, | |
"hellaswag": 0, | |
"rte": 0, | |
"winogrande": 0, | |
"storycloze_2016": 0, | |
"boolq": 1, | |
"arc_easy": 0, | |
"arc_challenge": 0, | |
"sciq": 0, | |
"piqa": 0 | |
} | |
} |