lm5-2b8-55b-c4
/
evaluation
/rankeval_s_denoiser
/checkpoints_2b855b55bc4ul2ndfixnew_4_lm-eval_global_step52452_2023-02-09-17-38-12_4shots_backup.json
{ | |
"results": { | |
"anli_r1": { | |
"acc": 0.336, | |
"acc_stderr": 0.014944140233795025 | |
}, | |
"anli_r2": { | |
"acc": 0.353, | |
"acc_stderr": 0.01512017260548369 | |
}, | |
"anli_r3": { | |
"acc": 0.3375, | |
"acc_stderr": 0.01365589718546366 | |
}, | |
"cb": { | |
"acc": 0.5178571428571429, | |
"acc_stderr": 0.06737697508644645, | |
"f1": 0.3362023995826813 | |
}, | |
"copa": { | |
"acc": 0.6, | |
"acc_stderr": 0.04923659639173309 | |
}, | |
"hellaswag": { | |
"acc": 0.2938657637920733, | |
"acc_stderr": 0.004546002255456781, | |
"acc_norm": 0.32204740091615214, | |
"acc_norm_stderr": 0.00466306082837678 | |
}, | |
"rte": { | |
"acc": 0.49097472924187724, | |
"acc_stderr": 0.030091559826331334 | |
}, | |
"winogrande": { | |
"acc": 0.48303078137332284, | |
"acc_stderr": 0.014044390401612969 | |
}, | |
"storycloze_2016": { | |
"acc": 0.5793693212185996, | |
"acc_stderr": 0.011415827994342653 | |
}, | |
"boolq": { | |
"acc": 0.4437308868501529, | |
"acc_stderr": 0.008689501105367405 | |
}, | |
"arc_easy": { | |
"acc": 0.43013468013468015, | |
"acc_stderr": 0.010159130445178514, | |
"acc_norm": 0.39225589225589225, | |
"acc_norm_stderr": 0.010018744689650043 | |
}, | |
"arc_challenge": { | |
"acc": 0.19795221843003413, | |
"acc_stderr": 0.011643990971573395, | |
"acc_norm": 0.23122866894197952, | |
"acc_norm_stderr": 0.012320858834772266 | |
}, | |
"sciq": { | |
"acc": 0.721, | |
"acc_stderr": 0.01419015011761203, | |
"acc_norm": 0.686, | |
"acc_norm_stderr": 0.014683991951087967 | |
}, | |
"piqa": { | |
"acc": 0.6322089227421109, | |
"acc_stderr": 0.011250616646678797, | |
"acc_norm": 0.6240478781284005, | |
"acc_norm_stderr": 0.011301098166895724 | |
} | |
}, | |
"versions": { | |
"anli_r1": 0, | |
"anli_r2": 0, | |
"anli_r3": 0, | |
"cb": 1, | |
"copa": 0, | |
"hellaswag": 0, | |
"rte": 0, | |
"winogrande": 0, | |
"storycloze_2016": 0, | |
"boolq": 1, | |
"arc_easy": 0, | |
"arc_challenge": 0, | |
"sciq": 0, | |
"piqa": 0 | |
} | |
} |