task,metric,value,err,version anli_r1,acc,0.335,0.014933117490932575,0 anli_r2,acc,0.333,0.014910846164229857,0 anli_r3,acc,0.3416666666666667,0.013696658778002512,0 arc_challenge,acc,0.26791808873720135,0.012942030195136432,0 arc_challenge,acc_norm,0.2909556313993174,0.013273077865907581,0 arc_easy,acc,0.61489898989899,0.009985214798737247,0 arc_easy,acc_norm,0.5349326599326599,0.010234713052723684,0 boolq,acc,0.5804281345565749,0.008631175489166722,1 cb,acc,0.4107142857142857,0.0663363415035954,1 cb,f1,0.1940928270042194,,1 copa,acc,0.83,0.03775251680686371,0 hellaswag,acc,0.48207528380800635,0.004986573992451681,0 hellaswag,acc_norm,0.6312487552280422,0.004814803098436799,0 piqa,acc,0.7616974972796517,0.009940334245876203,0 piqa,acc_norm,0.7665941240478781,0.009869247889520993,0 rte,acc,0.5451263537906137,0.029973636495415252,0 sciq,acc,0.833,0.011800434324644586,0 sciq,acc_norm,0.747,0.01375427861358708,0 storycloze_2016,acc,0.7252805986103688,0.010322309878339504,0 winogrande,acc,0.5832675611681136,0.013856250072796318,0