lm1-misc-pile
/
1b121b21b
/evaluation
/lm1-1b1-21b-oscar-results_lm-eval_global_step52452_2022-12-28-10-34-30.json
{ | |
"results": { | |
"anli_r1": { | |
"acc": 0.286, | |
"acc_stderr": 0.014297146862517908 | |
}, | |
"anli_r2": { | |
"acc": 0.304, | |
"acc_stderr": 0.01455320568795044 | |
}, | |
"anli_r3": { | |
"acc": 0.3175, | |
"acc_stderr": 0.013443538681348054 | |
}, | |
"cb": { | |
"acc": 0.35714285714285715, | |
"acc_stderr": 0.06460957383809221, | |
"f1": 0.27666815942678014 | |
}, | |
"copa": { | |
"acc": 0.69, | |
"acc_stderr": 0.04648231987117316 | |
}, | |
"hellaswag": { | |
"acc": 0.4907388966341366, | |
"acc_stderr": 0.004988925410522774, | |
"acc_norm": 0.5834495120493925, | |
"acc_norm_stderr": 0.00491979470467327 | |
}, | |
"rte": { | |
"acc": 0.5631768953068592, | |
"acc_stderr": 0.029855247390314945 | |
}, | |
"winogrande": { | |
"acc": 0.5177584846093133, | |
"acc_stderr": 0.014043619596174966 | |
}, | |
"storycloze_2016": { | |
"acc": 0.5879208979155531, | |
"acc_stderr": 0.011382271506935862 | |
}, | |
"boolq": { | |
"acc": 0.6862385321100918, | |
"acc_stderr": 0.008115773046958279 | |
}, | |
"arc_easy": { | |
"acc": 0.5122053872053872, | |
"acc_stderr": 0.010256726235129016, | |
"acc_norm": 0.4877946127946128, | |
"acc_norm_stderr": 0.01025672623512901 | |
}, | |
"arc_challenge": { | |
"acc": 0.26535836177474403, | |
"acc_stderr": 0.012902554762313969, | |
"acc_norm": 0.2909556313993174, | |
"acc_norm_stderr": 0.013273077865907573 | |
}, | |
"sciq": { | |
"acc": 0.905, | |
"acc_stderr": 0.009276910103103324, | |
"acc_norm": 0.872, | |
"acc_norm_stderr": 0.010570133761108658 | |
}, | |
"piqa": { | |
"acc": 0.6605005440696409, | |
"acc_stderr": 0.011048455047173918, | |
"acc_norm": 0.6534276387377584, | |
"acc_norm_stderr": 0.011103020320872166 | |
} | |
}, | |
"versions": { | |
"anli_r1": 0, | |
"anli_r2": 0, | |
"anli_r3": 0, | |
"cb": 1, | |
"copa": 0, | |
"hellaswag": 0, | |
"rte": 0, | |
"winogrande": 0, | |
"storycloze_2016": 0, | |
"boolq": 1, | |
"arc_easy": 0, | |
"arc_challenge": 0, | |
"sciq": 0, | |
"piqa": 0 | |
} | |
} |