lm1-1b1-21b-c4
/
evaluation
/lm1-1b1-21b-c4-results_lm-eval_global_step39672_2022-12-21-12-07-40.json
{ | |
"results": { | |
"copa": { | |
"acc": 0.68, | |
"acc_stderr": 0.046882617226215034 | |
}, | |
"boolq": { | |
"acc": 0.5064220183486239, | |
"acc_stderr": 0.00874433361394033 | |
}, | |
"hellaswag": { | |
"acc": 0.3481378211511651, | |
"acc_stderr": 0.004754063867700179, | |
"acc_norm": 0.4282015534754033, | |
"acc_norm_stderr": 0.004938068627349486 | |
}, | |
"arc_challenge": { | |
"acc": 0.21245733788395904, | |
"acc_stderr": 0.01195348290658295, | |
"acc_norm": 0.25, | |
"acc_norm_stderr": 0.012653835621466646 | |
}, | |
"arc_easy": { | |
"acc": 0.48653198653198654, | |
"acc_stderr": 0.010256060854840756, | |
"acc_norm": 0.43813131313131315, | |
"acc_norm_stderr": 0.010180937100600073 | |
}, | |
"sciq": { | |
"acc": 0.758, | |
"acc_stderr": 0.013550631705555946, | |
"acc_norm": 0.665, | |
"acc_norm_stderr": 0.014933117490932568 | |
}, | |
"winogrande": { | |
"acc": 0.5153906866614049, | |
"acc_stderr": 0.014045826789783666 | |
}, | |
"piqa": { | |
"acc": 0.6985854189336235, | |
"acc_stderr": 0.01070624824275376, | |
"acc_norm": 0.705114254624592, | |
"acc_norm_stderr": 0.010639030620156982 | |
}, | |
"rte": { | |
"acc": 0.5306859205776173, | |
"acc_stderr": 0.03003973059219781 | |
} | |
}, | |
"versions": { | |
"copa": 0, | |
"boolq": 1, | |
"hellaswag": 0, | |
"arc_challenge": 0, | |
"arc_easy": 0, | |
"sciq": 0, | |
"winogrande": 0, | |
"piqa": 0, | |
"rte": 0 | |
} | |
} |