lm1-2b8-55b-c4-dedup / evaluation /rankeval /lm1-2b8-55b-nodups_1.csv
Muennighoff's picture
add
874e9d6
task,metric,value,err,version
anli_r1,acc,0.335,0.014933117490932573,0
anli_r2,acc,0.342,0.015008706182121728,0
anli_r3,acc,0.35,0.013774667009018554,0
arc_challenge,acc,0.2508532423208191,0.012668198621315433,0
arc_challenge,acc_norm,0.26706484641638223,0.012928933196496354,0
arc_easy,acc,0.5572390572390572,0.010192333348394457,0
arc_easy,acc_norm,0.5122053872053872,0.010256726235129021,0
boolq,acc,0.5079510703363914,0.00874394919013925,1
cb,acc,0.30357142857142855,0.06199938655510754,1
cb,f1,0.264400871459695,,1
copa,acc,0.75,0.04351941398892446,0
hellaswag,acc,0.4386576379207329,0.004952087083128898,0
hellaswag,acc_norm,0.5731925911173074,0.004936029827672035,0
piqa,acc,0.749183895538629,0.010113869547069044,0
piqa,acc_norm,0.7529923830250272,0.01006226814077264,0
rte,acc,0.5379061371841155,0.030009848912529113,0
sciq,acc,0.845,0.011450157470799478,0
sciq,acc_norm,0.816,0.012259457340938584,0
storycloze_2016,acc,0.711918760021379,0.01047253701982258,0
winogrande,acc,0.5674822415153907,0.013923911578623839,0