task,metric,value,err,version anli_r1,acc,0.342,0.015008706182121728,0 anli_r2,acc,0.323,0.014794927843348637,0 anli_r3,acc,0.3358333333333333,0.013639261190932882,0 arc_challenge,acc,0.2619453924914676,0.012849054826858112,0 arc_challenge,acc_norm,0.30716723549488056,0.013481034054980943,0 arc_easy,acc,0.5837542087542088,0.010114819404500867,0 arc_easy,acc_norm,0.5521885521885522,0.010203742451111525,0 boolq,acc,0.6,0.008568368985904962,1 cb,acc,0.375,0.06527912098338669,1 cb,f1,0.26656990807934206,,1 copa,acc,0.79,0.040936018074033256,0 hellaswag,acc,0.4715196176060546,0.004981680090303701,0 hellaswag,acc_norm,0.6190997809201354,0.004846156699486671,0 piqa,acc,0.7470076169749728,0.01014288869886246,0 piqa,acc_norm,0.7573449401523396,0.01000200256970869,0 rte,acc,0.5523465703971119,0.029931070362939526,0 sciq,acc,0.846,0.011419913065098708,0 sciq,acc_norm,0.806,0.012510816141264368,0 storycloze_2016,acc,0.703901656867985,0.010557307688475123,0 winogrande,acc,0.5753749013417522,0.013891893150264224,0