task,metric,value,err,version anli_r1,acc,0.33,0.014876872027456736,0 anli_r2,acc,0.364,0.015222868840522024,0 anli_r3,acc,0.3616666666666667,0.013876131663123877,0 arc_challenge,acc,0.2815699658703072,0.013143376735009022,0 arc_challenge,acc_norm,0.3242320819112628,0.013678810399518822,0 arc_easy,acc,0.625,0.009933992677987828,0 arc_easy,acc_norm,0.6132154882154882,0.009993308355370966,0 boolq,acc,0.6247706422018349,0.008468397820914277,1 cb,acc,0.5535714285714286,0.06703189227942395,1 cb,f1,0.38235294117647056,,1 copa,acc,0.77,0.04229525846816506,0 hellaswag,acc,0.47321250746863175,0.004982615233057104,0 hellaswag,acc_norm,0.6276638119896435,0.0048243930768266064,0 piqa,acc,0.7573449401523396,0.010002002569708698,0 piqa,acc_norm,0.7622415669205659,0.009932525779525492,0 rte,acc,0.5956678700361011,0.029540420517619723,0 sciq,acc,0.914,0.008870325962594766,0 sciq,acc_norm,0.902,0.009406619184621219,0 storycloze_2016,acc,0.7140566541956174,0.010449259851345843,0 winogrande,acc,0.5674822415153907,0.013923911578623814,0