lm1-2b8-55b-c4-dedup / evaluation /rankeval /lm1-2b8-55b-nodups_2.csv
Muennighoff's picture
add
874e9d6
task,metric,value,err,version
anli_r1,acc,0.329,0.014865395385928362,0
anli_r2,acc,0.335,0.014933117490932568,0
anli_r3,acc,0.3333333333333333,0.013613950010225606,0
arc_challenge,acc,0.2645051194539249,0.012889272949313366,0
arc_challenge,acc_norm,0.2858361774744027,0.013203196088537367,0
arc_easy,acc,0.5778619528619529,0.010134620524592271,0
arc_easy,acc_norm,0.5357744107744108,0.010233488709726544,0
boolq,acc,0.5165137614678899,0.008740284046486644,1
cb,acc,0.26785714285714285,0.05971290310957635,1
cb,f1,0.24172051976930028,,1
copa,acc,0.79,0.040936018074033256,0
hellaswag,acc,0.4371639115714001,0.004950221546187576,0
hellaswag,acc_norm,0.5686118303126867,0.004942578520987359,0
piqa,acc,0.7475516866158868,0.010135665547362364,0
piqa,acc_norm,0.7589771490750816,0.009979042717267312,0
rte,acc,0.5018050541516246,0.030096267148976626,0
sciq,acc,0.866,0.010777762298369683,0
sciq,acc_norm,0.828,0.011939788882495321,0
storycloze_2016,acc,0.7071084981293426,0.010523873293246304,0
winogrande,acc,0.5651144435674822,0.013932814110418029,0