task,metric,value,err,version anli_r1,acc,0.329,0.014865395385928362,0 anli_r2,acc,0.335,0.014933117490932568,0 anli_r3,acc,0.3333333333333333,0.013613950010225606,0 arc_challenge,acc,0.2645051194539249,0.012889272949313366,0 arc_challenge,acc_norm,0.2858361774744027,0.013203196088537367,0 arc_easy,acc,0.5778619528619529,0.010134620524592271,0 arc_easy,acc_norm,0.5357744107744108,0.010233488709726544,0 boolq,acc,0.5165137614678899,0.008740284046486644,1 cb,acc,0.26785714285714285,0.05971290310957635,1 cb,f1,0.24172051976930028,,1 copa,acc,0.79,0.040936018074033256,0 hellaswag,acc,0.4371639115714001,0.004950221546187576,0 hellaswag,acc_norm,0.5686118303126867,0.004942578520987359,0 piqa,acc,0.7475516866158868,0.010135665547362364,0 piqa,acc_norm,0.7589771490750816,0.009979042717267312,0 rte,acc,0.5018050541516246,0.030096267148976626,0 sciq,acc,0.866,0.010777762298369683,0 sciq,acc_norm,0.828,0.011939788882495321,0 storycloze_2016,acc,0.7071084981293426,0.010523873293246304,0 winogrande,acc,0.5651144435674822,0.013932814110418029,0