task,metric,value,err,version anli_r1,acc,0.351,0.015100563798316402,0 anli_r2,acc,0.352,0.015110404505648668,0 anli_r3,acc,0.36833333333333335,0.013930121355353778,0 arc_challenge,acc,0.2986348122866894,0.013374078615068756,0 arc_challenge,acc_norm,0.3310580204778157,0.013752062419817836,0 arc_easy,acc,0.6456228956228957,0.009815004030251746,0 arc_easy,acc_norm,0.6464646464646465,0.0098097289481515,0 boolq,acc,0.6070336391437309,0.008542335147970564,1 cb,acc,0.48214285714285715,0.0673769750864465,1 cb,f1,0.3366858237547892,,1 copa,acc,0.76,0.04292346959909283,0 hellaswag,acc,0.4567815176259709,0.0049711062650465545,0 hellaswag,acc_norm,0.602370045807608,0.004884079750433882,0 piqa,acc,0.7410228509249184,0.010220966031405609,0 piqa,acc_norm,0.7431991294885746,0.010192864802278042,0 rte,acc,0.5667870036101083,0.02982676408213828,0 sciq,acc,0.922,0.008484573530118581,0 sciq,acc_norm,0.929,0.008125578442487917,0 storycloze_2016,acc,0.7156600748262961,0.010431614128665242,0 winogrande,acc,0.5595895816890292,0.013952330311915603,0