task,metric,value,err,version anli_r1,acc,0.333,0.014910846164229868,0 anli_r2,acc,0.338,0.014965960710224482,0 anli_r3,acc,0.3458333333333333,0.013736245342311012,0 arc_challenge,acc,0.21416382252559726,0.011988383205966499,0 arc_challenge,acc_norm,0.25597269624573377,0.012753013241244513,0 arc_easy,acc,0.5294612794612794,0.010241957728409686,0 arc_easy,acc_norm,0.45202020202020204,0.010212436978834111,0 boolq,acc,0.6048929663608563,0.008550454248280895,1 cb,acc,0.35714285714285715,0.0646095738380922,1 cb,f1,0.19814814814814818,,1 copa,acc,0.68,0.046882617226215034,0 hellaswag,acc,0.3619796853216491,0.00479590828258455,0 hellaswag,acc_norm,0.44632543318064133,0.0049609473885351,0 piqa,acc,0.6980413492927094,0.01071173289158835,0 piqa,acc_norm,0.7040261153427638,0.010650414317148128,0 rte,acc,0.5270758122743683,0.030052303463143706,0 sciq,acc,0.821,0.012128730605719111,0 sciq,acc_norm,0.711,0.01434171135829618,0 storycloze_2016,acc,0.6504543025120256,0.01102654800403797,0 winogrande,acc,0.5201262825572218,0.014041096664344327,0