task,metric,value,err,version anli_r1,acc,0.344,0.015029633724408943,0 anli_r2,acc,0.374,0.015308767369006356,0 anli_r3,acc,0.34833333333333333,0.01375943749887407,0 arc_challenge,acc,0.302901023890785,0.013428241573185349,0 arc_challenge,acc_norm,0.3242320819112628,0.01367881039951882,0 arc_easy,acc,0.6262626262626263,0.009927267058259625,0 arc_easy,acc_norm,0.61489898989899,0.009985214798737251,0 boolq,acc,0.6168195718654435,0.008503021391450788,1 cb,acc,0.42857142857142855,0.06672848092813058,1 cb,f1,0.19999999999999998,,1 copa,acc,0.77,0.04229525846816505,0 hellaswag,acc,0.4715196176060546,0.004981680090303699,0 hellaswag,acc_norm,0.6269667396932882,0.004826224784850446,0 piqa,acc,0.7529923830250272,0.010062268140772624,0 piqa,acc_norm,0.7616974972796517,0.009940334245876219,0 rte,acc,0.48736462093862815,0.030086851767188564,0 sciq,acc,0.912,0.008963053962592078,0 sciq,acc_norm,0.896,0.009658016218524301,0 storycloze_2016,acc,0.7327632282202031,0.010233145255103061,0 winogrande,acc,0.5864246250986582,0.013840971763195304,0