task,metric,value,err,version anli_r1,acc,0.318,0.014734079309311901,0 anli_r2,acc,0.34,0.014987482264363937,0 anli_r3,acc,0.3516666666666667,0.013789711695404803,0 arc_challenge,acc,0.2568259385665529,0.0127669237941168,0 arc_challenge,acc_norm,0.295221843003413,0.013329750293382316,0 arc_easy,acc,0.5976430976430976,0.010062244711011525,0 arc_easy,acc_norm,0.5585016835016835,0.010189314382749927,0 boolq,acc,0.5871559633027523,0.008611172430472871,1 cb,acc,0.4642857142857143,0.0672477765493766,1 cb,f1,0.3260233918128655,,1 copa,acc,0.74,0.0440844002276808,0 hellaswag,acc,0.44284007169886475,0.004957068377516515,0 hellaswag,acc_norm,0.58105954989046,0.004923772581848488,0 piqa,acc,0.7399347116430903,0.0102348932490613,0 piqa,acc_norm,0.7470076169749728,0.010142888698862453,0 rte,acc,0.48014440433212996,0.0300727231673172,0 sciq,acc,0.847,0.011389500459665532,0 sciq,acc_norm,0.809,0.012436787112179482,0 storycloze_2016,acc,0.692143238909674,0.010674598158758175,0 winogrande,acc,0.5548539857932123,0.01396766295435549,0