task,metric,value,err,version anli_r1,acc,0.32,0.014758652303574876,0 anli_r2,acc,0.34,0.014987482264363937,0 anli_r3,acc,0.345,0.013728421539454881,0 arc_challenge,acc,0.2525597269624573,0.01269672898020771,0 arc_challenge,acc_norm,0.2790102389078498,0.013106784883601343,0 arc_easy,acc,0.577020202020202,0.010137328382209097,0 arc_easy,acc_norm,0.5538720538720538,0.01020005782876501,0 boolq,acc,0.4853211009174312,0.00874128556866792,1 cb,acc,0.44642857142857145,0.06703189227942397,1 cb,f1,0.42175558247707934,,1 copa,acc,0.8,0.040201512610368445,0 hellaswag,acc,0.4334793865763792,0.004945424771611596,0 hellaswag,acc_norm,0.5747858992232623,0.004933650697000599,0 piqa,acc,0.7464635473340587,0.010150090834551786,0 piqa,acc_norm,0.7584330794341676,0.00998671800180446,0 rte,acc,0.5342960288808665,0.030025579819366426,0 sciq,acc,0.868,0.010709373963528031,0 sciq,acc_norm,0.85,0.011297239823409296,0 storycloze_2016,acc,0.7145911277391769,0.010443395884062106,0 winogrande,acc,0.5619573796369376,0.013944181296470804,0