task,metric,value,err,version anli_r1,acc,0.306,0.014580006055436967,0 anli_r2,acc,0.359,0.01517726422479859,0 anli_r3,acc,0.35583333333333333,0.01382651874849331,0 arc_challenge,acc,0.2773037542662116,0.013082095839059376,0 arc_challenge,acc_norm,0.3174061433447099,0.01360223908803817,0 arc_easy,acc,0.6430976430976431,0.009830630210347012,0 arc_easy,acc_norm,0.622895622895623,0.00994504194636652,0 boolq,acc,0.634862385321101,0.008420941009417815,1 cb,acc,0.5714285714285714,0.06672848092813058,1 cb,f1,0.5178689064558629,,1 copa,acc,0.83,0.03775251680686371,0 hellaswag,acc,0.47540330611431986,0.004983740145218613,0 hellaswag,acc_norm,0.630551682931687,0.004816690123209743,0 piqa,acc,0.7573449401523396,0.010002002569708698,0 piqa,acc_norm,0.766050054406964,0.00987723689513744,0 rte,acc,0.5415162454873647,0.029992535385373314,0 sciq,acc,0.914,0.008870325962594766,0 sciq,acc_norm,0.906,0.009233052000787733,0 storycloze_2016,acc,0.7258150721539284,0.010316062787590011,0 winogrande,acc,0.5919494869771112,0.013812822643745028,0