task,metric,value,err,version anli_r1,acc,0.336,0.014944140233795023,0 anli_r2,acc,0.326,0.014830507204541037,0 anli_r3,acc,0.33,0.013579531277800922,0 arc_challenge,acc,0.2721843003412969,0.013006600406423707,0 arc_challenge,acc_norm,0.29436860068259385,0.013318528460539422,0 arc_easy,acc,0.5854377104377104,0.010108889212447769,0 arc_easy,acc_norm,0.5723905723905723,0.010151683397430677,0 boolq,acc,0.57217125382263,0.008653474894637182,1 cb,acc,0.25,0.058387420812114225,1 cb,f1,0.2095321637426901,,1 copa,acc,0.72,0.045126085985421276,0 hellaswag,acc,0.4051981676956781,0.004899270310557984,0 hellaswag,acc_norm,0.5231029675363473,0.004984452002563928,0 piqa,acc,0.721436343852013,0.010459397235965182,0 piqa,acc_norm,0.719260065288357,0.010484325438311827,0 rte,acc,0.49097472924187724,0.030091559826331334,0 sciq,acc,0.891,0.009859828407037188,0 sciq,acc_norm,0.883,0.010169287802713327,0 storycloze_2016,acc,0.6632816675574559,0.010928525619392455,0 winogrande,acc,0.5469613259668509,0.013990366632148104,0