lm1-misc-pile
/
83m20b20b
/evaluation
/lm1-83m-20b-results_lm-eval_global_step37905_2023-01-24-13-53-29_2shots.csv
task,metric,value,err,version | |
anli_r1,acc,0.327,0.014842213153411249,0 | |
anli_r2,acc,0.332,0.014899597242811483,0 | |
anli_r3,acc,0.3416666666666667,0.013696658778002512,0 | |
arc_challenge,acc,0.17064846416382254,0.010993654168413735,0 | |
arc_challenge,acc_norm,0.2150170648464164,0.01200571763413361,0 | |
arc_easy,acc,0.34595959595959597,0.009760749624427521,0 | |
arc_easy,acc_norm,0.3371212121212121,0.009700146509130083,0 | |
boolq,acc,0.5938837920489297,0.00858951094378741,1 | |
cb,acc,0.44642857142857145,0.06703189227942397,1 | |
cb,f1,0.3083804143126177,,1 | |
copa,acc,0.52,0.050211673156867795,0 | |
hellaswag,acc,0.26329416450906196,0.004395205528158076,0 | |
hellaswag,acc_norm,0.26926906990639315,0.004426734718808876,0 | |
piqa,acc,0.5696409140369967,0.011552114834700509,0 | |
piqa,acc_norm,0.5647442872687704,0.011567608588759421,0 | |
rte,acc,0.49097472924187724,0.030091559826331334,0 | |
sciq,acc,0.628,0.015292149942040577,0 | |
sciq,acc_norm,0.576,0.01563548747140519,0 | |
storycloze_2016,acc,0.5200427578834848,0.011553138977961012,0 | |
winogrande,acc,0.505130228887135,0.014051745961790516,0 | |