task,metric,value,err,version anli_r1,acc,0.329,0.014865395385928364,0 anli_r2,acc,0.338,0.014965960710224487,0 anli_r3,acc,0.3325,0.013605417345710528,0 arc_challenge,acc,0.22525597269624573,0.012207839995407309,0 arc_challenge,acc_norm,0.25853242320819114,0.012794553754288673,0 arc_easy,acc,0.5433501683501684,0.010221149650118182,0 arc_easy,acc_norm,0.523989898989899,0.010247967392742688,0 boolq,acc,0.5577981651376147,0.00868643052611449,1 cb,acc,0.5535714285714286,0.06703189227942395,1 cb,f1,0.3502252252252252,,1 copa,acc,0.68,0.04688261722621505,0 hellaswag,acc,0.3595897231627166,0.004788994060654276,0 hellaswag,acc_norm,0.44911372236606256,0.004963872936857938,0 piqa,acc,0.7083786724700761,0.01060444152742879,0 piqa,acc_norm,0.7007616974972797,0.010684130673134581,0 rte,acc,0.4657039711191336,0.030025579819366426,0 sciq,acc,0.895,0.009698921026024968,0 sciq,acc_norm,0.898,0.00957536880165389,0 storycloze_2016,acc,0.6365579903794762,0.011122841442059708,0 winogrande,acc,0.5224940805051302,0.014038257824059876,0