task,metric,value,err,version anli_r1,acc,0.321,0.014770821817934645,0 anli_r2,acc,0.332,0.014899597242811482,0 anli_r3,acc,0.3425,0.013704669762934728,0 arc_challenge,acc,0.27474402730375425,0.013044617212771227,0 arc_challenge,acc_norm,0.3037542662116041,0.013438909184778757,0 arc_easy,acc,0.6178451178451179,0.009970747281292436,0 arc_easy,acc_norm,0.601010101010101,0.010048240683798748,0 boolq,acc,0.5532110091743119,0.008695392261996192,1 cb,acc,0.39285714285714285,0.0658538889806635,1 cb,f1,0.2736842105263158,,1 copa,acc,0.76,0.04292346959909283,0 hellaswag,acc,0.46873132842063336,0.004980014536539821,0 hellaswag,acc_norm,0.6212905795658236,0.0048407422067181065,0 piqa,acc,0.7557127312295974,0.010024765172284242,0 piqa,acc_norm,0.7600652883569097,0.009963625892809545,0 rte,acc,0.5234657039711191,0.03006330041190266,0 sciq,acc,0.887,0.010016552866696846,0 sciq,acc_norm,0.876,0.010427498872343973,0 storycloze_2016,acc,0.7156600748262961,0.010431614128665253,0 winogrande,acc,0.5895816890292028,0.013825107120035866,0