task,metric,value,err,version anli_r1,acc,0.34,0.014987482264363937,0 anli_r2,acc,0.34,0.014987482264363937,0 anli_r3,acc,0.3408333333333333,0.013688600793296939,0 arc_challenge,acc,0.24744027303754265,0.01261035266329267,0 arc_challenge,acc_norm,0.2858361774744027,0.013203196088537369,0 arc_easy,acc,0.5694444444444444,0.010160345396860075,0 arc_easy,acc_norm,0.5151515151515151,0.010255071794531504,0 boolq,acc,0.5318042813455658,0.008727345583419184,1 cb,acc,0.375,0.06527912098338669,1 cb,f1,0.1818181818181818,,1 copa,acc,0.77,0.04229525846816506,0 hellaswag,acc,0.4060944035052778,0.004900988997414227,0 hellaswag,acc_norm,0.5160326628161721,0.004987215542259667,0 piqa,acc,0.7219804134929271,0.010453117358332802,0 piqa,acc_norm,0.7247007616974973,0.01042142927736953,0 rte,acc,0.5487364620938628,0.029953149241808946,0 sciq,acc,0.833,0.011800434324644594,0 sciq,acc_norm,0.754,0.013626065817750636,0 storycloze_2016,acc,0.6739711384286478,0.010839964752045184,0 winogrande,acc,0.5430149960536701,0.01400038676159829,0