task,metric,value,err,version anli_r1,acc,0.339,0.014976758771620342,0 anli_r2,acc,0.323,0.01479492784334864,0 anli_r3,acc,0.3475,0.013751753243291852,0 arc_challenge,acc,0.2551194539249147,0.012739038695202102,0 arc_challenge,acc_norm,0.28498293515358364,0.013191348179838795,0 arc_easy,acc,0.5833333333333334,0.010116282977781239,0 arc_easy,acc_norm,0.5361952861952862,0.010232865550346736,0 boolq,acc,0.5620795107033639,0.008677388652709261,1 cb,acc,0.35714285714285715,0.0646095738380922,1 cb,f1,0.2627450980392157,,1 copa,acc,0.7,0.046056618647183814,0 hellaswag,acc,0.4334793865763792,0.0049454247716115935,0 hellaswag,acc_norm,0.5575582553276239,0.0049566093272183885,0 piqa,acc,0.7323177366702938,0.01033011118937043,0 piqa,acc_norm,0.7410228509249184,0.010220966031405617,0 rte,acc,0.5379061371841155,0.030009848912529117,0 sciq,acc,0.877,0.010391293421849877,0 sciq,acc_norm,0.84,0.011598902298689007,0 storycloze_2016,acc,0.6841261357562801,0.010749892827011111,0 winogrande,acc,0.5509076558800315,0.01397945938914085,0