lm1-2b8-55b-c4-dedup / evaluation /rankeval /lm1-2b8-55b-nodups_5.csv
Muennighoff's picture
add
874e9d6
task,metric,value,err,version
anli_r1,acc,0.32,0.014758652303574876,0
anli_r2,acc,0.34,0.014987482264363937,0
anli_r3,acc,0.345,0.013728421539454881,0
arc_challenge,acc,0.2525597269624573,0.01269672898020771,0
arc_challenge,acc_norm,0.2790102389078498,0.013106784883601343,0
arc_easy,acc,0.577020202020202,0.010137328382209097,0
arc_easy,acc_norm,0.5538720538720538,0.01020005782876501,0
boolq,acc,0.4853211009174312,0.00874128556866792,1
cb,acc,0.44642857142857145,0.06703189227942397,1
cb,f1,0.42175558247707934,,1
copa,acc,0.8,0.040201512610368445,0
hellaswag,acc,0.4334793865763792,0.004945424771611596,0
hellaswag,acc_norm,0.5747858992232623,0.004933650697000599,0
piqa,acc,0.7464635473340587,0.010150090834551786,0
piqa,acc_norm,0.7584330794341676,0.00998671800180446,0
rte,acc,0.5342960288808665,0.030025579819366426,0
sciq,acc,0.868,0.010709373963528031,0
sciq,acc_norm,0.85,0.011297239823409296,0
storycloze_2016,acc,0.7145911277391769,0.010443395884062106,0
winogrande,acc,0.5619573796369376,0.013944181296470804,0