task,metric,value,err,version anli_r1,acc,0.333,0.014910846164229863,0 anli_r2,acc,0.35,0.015090650341444231,0 anli_r3,acc,0.3491666666666667,0.013767075395077249,0 arc_challenge,acc,0.2636518771331058,0.012875929151297056,0 arc_challenge,acc_norm,0.30631399317406144,0.013470584417276513,0 arc_easy,acc,0.5361952861952862,0.010232865550346727,0 arc_easy,acc_norm,0.4823232323232323,0.010253369805698971,0 boolq,acc,0.5504587155963303,0.008700409761350798,1 cb,acc,0.48214285714285715,0.0673769750864465,1 cb,f1,0.3432539682539683,,1 copa,acc,0.75,0.04351941398892446,0 hellaswag,acc,0.45757817167894843,0.004971789638563322,0 hellaswag,acc_norm,0.5881298546106354,0.004911659884506154,0 piqa,acc,0.7334058759521219,0.010316749863541369,0 piqa,acc_norm,0.733949945593036,0.010310039263352827,0 rte,acc,0.48736462093862815,0.030086851767188564,0 sciq,acc,0.765,0.013414729030247116,0 sciq,acc_norm,0.68,0.014758652303574885,0 storycloze_2016,acc,0.6900053447354356,0.010695042806212553,0 winogrande,acc,0.5580110497237569,0.01395758407910899,0