task,metric,value,err,version anli_r1,acc,0.315,0.014696631960792508,0 anli_r2,acc,0.343,0.015019206922356951,0 anli_r3,acc,0.33166666666666667,0.013596836729485168,0 arc_challenge,acc,0.26109215017064846,0.012835523909473841,0 arc_challenge,acc_norm,0.3054607508532423,0.013460080478002505,0 arc_easy,acc,0.6005892255892256,0.010050018228742127,0 arc_easy,acc_norm,0.5875420875420876,0.010101305447864764,0 boolq,acc,0.6061162079510704,0.00854583579261498,1 cb,acc,0.5,0.06741998624632421,1 cb,f1,0.3416488477072939,,1 copa,acc,0.68,0.04688261722621504,0 hellaswag,acc,0.40659231228838877,0.004901936511546122,0 hellaswag,acc_norm,0.5274845648277235,0.004982237133409149,0 piqa,acc,0.721436343852013,0.010459397235965175,0 piqa,acc_norm,0.7252448313384113,0.010415033676676051,0 rte,acc,0.4657039711191336,0.030025579819366426,0 sciq,acc,0.915,0.008823426366942328,0 sciq,acc_norm,0.911,0.009008893392651532,0 storycloze_2016,acc,0.6734366648850882,0.010844543793668893,0 winogrande,acc,0.5556432517758485,0.013965196769083555,0