lm1-4b2-84b-c4seeds
/
4b284b42bc4seed2
/evaluation
/rankeval
/4b284b42bc4seed2_5_lm-eval_global_step80108_2023-02-15-11-04-03_5shots_backup.json
{ | |
"results": { | |
"anli_r1": { | |
"acc": 0.339, | |
"acc_stderr": 0.014976758771620347 | |
}, | |
"anli_r2": { | |
"acc": 0.322, | |
"acc_stderr": 0.014782913600996664 | |
}, | |
"anli_r3": { | |
"acc": 0.35333333333333333, | |
"acc_stderr": 0.013804572162314925 | |
}, | |
"cb": { | |
"acc": 0.375, | |
"acc_stderr": 0.06527912098338669, | |
"f1": 0.25089094796863864 | |
}, | |
"copa": { | |
"acc": 0.76, | |
"acc_stderr": 0.04292346959909283 | |
}, | |
"hellaswag": { | |
"acc": 0.4671380203146783, | |
"acc_stderr": 0.004978992721242829, | |
"acc_norm": 0.6250746863174667, | |
"acc_norm_stderr": 0.004831142570475509 | |
}, | |
"rte": { | |
"acc": 0.5018050541516246, | |
"acc_stderr": 0.030096267148976626 | |
}, | |
"winogrande": { | |
"acc": 0.5832675611681136, | |
"acc_stderr": 0.01385625007279632 | |
}, | |
"storycloze_2016": { | |
"acc": 0.7252805986103688, | |
"acc_stderr": 0.010322309878339502 | |
}, | |
"boolq": { | |
"acc": 0.5290519877675841, | |
"acc_stderr": 0.00873028052845153 | |
}, | |
"arc_easy": { | |
"acc": 0.6195286195286195, | |
"acc_stderr": 0.00996230599205857, | |
"acc_norm": 0.6136363636363636, | |
"acc_norm_stderr": 0.009991296778159615 | |
}, | |
"arc_challenge": { | |
"acc": 0.28498293515358364, | |
"acc_stderr": 0.013191348179838793, | |
"acc_norm": 0.310580204778157, | |
"acc_norm_stderr": 0.01352229209805305 | |
}, | |
"sciq": { | |
"acc": 0.906, | |
"acc_stderr": 0.009233052000787736, | |
"acc_norm": 0.894, | |
"acc_norm_stderr": 0.009739551265785133 | |
}, | |
"piqa": { | |
"acc": 0.7453754080522307, | |
"acc_stderr": 0.01016443223706049, | |
"acc_norm": 0.7595212187159956, | |
"acc_norm_stderr": 0.009971345364651066 | |
} | |
}, | |
"versions": { | |
"anli_r1": 0, | |
"anli_r2": 0, | |
"anli_r3": 0, | |
"cb": 1, | |
"copa": 0, | |
"hellaswag": 0, | |
"rte": 0, | |
"winogrande": 0, | |
"storycloze_2016": 0, | |
"boolq": 1, | |
"arc_easy": 0, | |
"arc_challenge": 0, | |
"sciq": 0, | |
"piqa": 0 | |
} | |
} |