lm1-2b8-55b-c4-dedup / evaluation /rankeval /lm1-2b8-55b-nodups_4.csv
Muennighoff's picture
add
874e9d6
task,metric,value,err,version
anli_r1,acc,0.331,0.014888272588203926,0
anli_r2,acc,0.339,0.01497675877162035,0
anli_r3,acc,0.34,0.013680495725767794,0
arc_challenge,acc,0.26109215017064846,0.012835523909473848,0
arc_challenge,acc_norm,0.2858361774744027,0.013203196088537367,0
arc_easy,acc,0.5845959595959596,0.010111869494911517,0
arc_easy,acc_norm,0.5555555555555556,0.01019625483869168,0
boolq,acc,0.48807339449541287,0.008742566760633421,1
cb,acc,0.3392857142857143,0.06384226561930825,1
cb,f1,0.31372797744890774,,1
copa,acc,0.79,0.040936018074033256,0
hellaswag,acc,0.4364668392750448,0.0049493353568818635,0
hellaswag,acc_norm,0.5734913363871739,0.004935587729948866,0
piqa,acc,0.7404787812840044,0.01022793988817392,0
piqa,acc_norm,0.7529923830250272,0.010062268140772625,0
rte,acc,0.48014440433212996,0.0300727231673172,0
sciq,acc,0.857,0.01107581480856704,0
sciq,acc_norm,0.842,0.011539894677559564,0
storycloze_2016,acc,0.7172634954569749,0.010413806486121271,0
winogrande,acc,0.5627466456195738,0.013941393310695922,0