task,metric,value,err,version anli_r1,acc,0.318,0.014734079309311901,0 anli_r2,acc,0.331,0.01488827258820394,0 anli_r3,acc,0.33916666666666667,0.013672343491681808,0 arc_challenge,acc,0.28498293515358364,0.013191348179838793,0 arc_challenge,acc_norm,0.30204778156996587,0.013417519144716422,0 arc_easy,acc,0.6077441077441077,0.010018744689650043,0 arc_easy,acc_norm,0.539983164983165,0.010226927233491506,0 boolq,acc,0.5501529051987768,0.008700950643028798,1 cb,acc,0.375,0.06527912098338669,1 cb,f1,0.1986111111111111,,1 copa,acc,0.77,0.04229525846816506,0 hellaswag,acc,0.47649870543716394,0.004984266543053125,0 hellaswag,acc_norm,0.6253734315873332,0.004830371317841073,0 piqa,acc,0.7519042437431991,0.010077118315574719,0 piqa,acc_norm,0.7589771490750816,0.009979042717267314,0 rte,acc,0.5487364620938628,0.029953149241808946,0 sciq,acc,0.849,0.011328165223341671,0 sciq,acc_norm,0.758,0.013550631705555956,0 storycloze_2016,acc,0.7226082308925709,0.010353267472010765,0 winogrande,acc,0.5753749013417522,0.01389189315026423,0