task,metric,value,err,version anli_r1,acc,0.343,0.015019206922356951,0 anli_r2,acc,0.35,0.015090650341444231,0 anli_r3,acc,0.3516666666666667,0.013789711695404794,0 arc_challenge,acc,0.25170648464163825,0.012682496334042963,0 arc_challenge,acc_norm,0.2841296928327645,0.013179442447653886,0 arc_easy,acc,0.5778619528619529,0.010134620524592271,0 arc_easy,acc_norm,0.5509259259259259,0.010206428316323365,0 boolq,acc,0.5039755351681957,0.008744778542942208,1 cb,acc,0.35714285714285715,0.06460957383809221,1 cb,f1,0.3445134575569358,,1 copa,acc,0.79,0.04093601807403326,0 hellaswag,acc,0.4389563831905995,0.004952454721934793,0 hellaswag,acc_norm,0.5718980282812188,0.0049379243267425755,0 piqa,acc,0.7421109902067464,0.010206956662056255,0 piqa,acc_norm,0.7529923830250272,0.010062268140772625,0 rte,acc,0.5126353790613718,0.030086851767188564,0 sciq,acc,0.865,0.010811655372416053,0 sciq,acc_norm,0.845,0.01145015747079946,0 storycloze_2016,acc,0.7156600748262961,0.01043161412866525,0 winogrande,acc,0.5698500394632992,0.013914685094716694,0