task,metric,value,err,version anli_r1,acc,0.348,0.01507060460376841,0 anli_r2,acc,0.364,0.01522286884052202,0 anli_r3,acc,0.33666666666666667,0.013647602942406389,0 arc_challenge,acc,0.2773037542662116,0.013082095839059374,0 arc_challenge,acc_norm,0.30887372013651876,0.013501770929344003,0 arc_easy,acc,0.6035353535353535,0.010037412763064526,0 arc_easy,acc_norm,0.5854377104377104,0.010108889212447783,0 boolq,acc,0.6024464831804281,0.008559523256936824,1 cb,acc,0.375,0.06527912098338669,1 cb,f1,0.2536231884057971,,1 copa,acc,0.81,0.03942772444036622,0 hellaswag,acc,0.4493128858793069,0.0049640758701203404,0 hellaswag,acc_norm,0.5959968133837881,0.0048969523785069215,0 piqa,acc,0.7383025027203483,0.010255630772708232,0 piqa,acc_norm,0.7470076169749728,0.01014288869886245,0 rte,acc,0.5451263537906137,0.029973636495415252,0 sciq,acc,0.902,0.009406619184621236,0 sciq,acc_norm,0.882,0.01020686926438179,0 storycloze_2016,acc,0.6990913949759487,0.010606289538707344,0 winogrande,acc,0.5556432517758485,0.013965196769083553,0