task,metric,value,err,version anli_r1,acc,0.334,0.014922019523732977,0 anli_r2,acc,0.351,0.015100563798316407,0 anli_r3,acc,0.32083333333333336,0.013480882752851543,0 arc_challenge,acc,0.3003412969283277,0.013395909309956995,0 arc_challenge,acc_norm,0.3225255972696246,0.013659980894277373,0 arc_easy,acc,0.6258417508417509,0.009929516948977627,0 arc_easy,acc_norm,0.6022727272727273,0.010042861602178058,0 boolq,acc,0.6073394495412844,0.008541161248702913,1 cb,acc,0.42857142857142855,0.06672848092813058,1 cb,f1,0.2943692088382039,,1 copa,acc,0.77,0.04229525846816506,0 hellaswag,acc,0.472814180442143,0.004982400368939667,0 hellaswag,acc_norm,0.6304521011750648,0.00481695881772609,0 piqa,acc,0.7513601741022851,0.010084511234296852,0 piqa,acc_norm,0.7687704026115343,0.009837063180625336,0 rte,acc,0.5415162454873647,0.029992535385373314,0 sciq,acc,0.914,0.008870325962594766,0 sciq,acc_norm,0.901,0.009449248027662734,0 storycloze_2016,acc,0.7188669160876536,0.0103958360916281,0 winogrande,acc,0.5911602209944752,0.0138169542951357,0