task,metric,value,err,version anli_r1,acc,0.34,0.014987482264363937,0 anli_r2,acc,0.34,0.014987482264363935,0 anli_r3,acc,0.33416666666666667,0.013622434813136772,0 arc_challenge,acc,0.2832764505119454,0.013167478735134576,0 arc_challenge,acc_norm,0.30887372013651876,0.013501770929344003,0 arc_easy,acc,0.6241582491582491,0.009938436373170635,0 arc_easy,acc_norm,0.5951178451178452,0.0100724239603957,0 boolq,acc,0.618348623853211,0.008496550741178258,1 cb,acc,0.375,0.06527912098338669,1 cb,f1,0.2654970760233918,,1 copa,acc,0.78,0.04163331998932261,0 hellaswag,acc,0.4720175263891655,0.004981961097590805,0 hellaswag,acc_norm,0.6224855606452898,0.004837744647345714,0 piqa,acc,0.7595212187159956,0.009971345364651076,0 piqa,acc_norm,0.7671381936887922,0.009861236071080753,0 rte,acc,0.592057761732852,0.029581952519606186,0 sciq,acc,0.902,0.009406619184621235,0 sciq,acc_norm,0.885,0.01009340759490463,0 storycloze_2016,acc,0.7161945483698557,0.01042569627973092,0 winogrande,acc,0.5603788476716653,0.01394964977601569,0