lm1-misc-pile / 619m22b22b /evaluation /rankeval /lm1-619m-22b-results_lm-eval_global_step41007_2023-01-24-13-57-03_3shots.json
Muennighoff's picture
Add
8393ff0
{
"results": {
"anli_r1": {
"acc": 0.326,
"acc_stderr": 0.014830507204541024
},
"anli_r2": {
"acc": 0.358,
"acc_stderr": 0.015167928865407557
},
"anli_r3": {
"acc": 0.3375,
"acc_stderr": 0.013655897185463665
},
"cb": {
"acc": 0.35714285714285715,
"acc_stderr": 0.0646095738380922,
"f1": 0.23561507936507933
},
"copa": {
"acc": 0.69,
"acc_stderr": 0.04648231987117316
},
"hellaswag": {
"acc": 0.30233021310495917,
"acc_stderr": 0.004583289072937737,
"acc_norm": 0.3353913563035252,
"acc_norm_stderr": 0.004711622011148457
},
"rte": {
"acc": 0.5415162454873647,
"acc_stderr": 0.029992535385373314
},
"winogrande": {
"acc": 0.5193370165745856,
"acc_stderr": 0.01404197273371297
},
"storycloze_2016": {
"acc": 0.6002137894174239,
"acc_stderr": 0.011327813397531862
},
"boolq": {
"acc": 0.5299694189602446,
"acc_stderr": 0.0087293318183149
},
"arc_easy": {
"acc": 0.4882154882154882,
"acc_stderr": 0.010256933475911015,
"acc_norm": 0.4671717171717172,
"acc_norm_stderr": 0.010237645778853851
},
"arc_challenge": {
"acc": 0.2226962457337884,
"acc_stderr": 0.012158314774829924,
"acc_norm": 0.25170648464163825,
"acc_norm_stderr": 0.012682496334042961
},
"sciq": {
"acc": 0.834,
"acc_stderr": 0.011772110370812192,
"acc_norm": 0.809,
"acc_norm_stderr": 0.012436787112179486
},
"piqa": {
"acc": 0.6436343852013058,
"acc_stderr": 0.011174109865864703,
"acc_norm": 0.6436343852013058,
"acc_norm_stderr": 0.011174109865864729
}
},
"versions": {
"anli_r1": 0,
"anli_r2": 0,
"anli_r3": 0,
"cb": 1,
"copa": 0,
"hellaswag": 0,
"rte": 0,
"winogrande": 0,
"storycloze_2016": 0,
"boolq": 1,
"arc_easy": 0,
"arc_challenge": 0,
"sciq": 0,
"piqa": 0
}
}