task,metric,value,err,version anli_r1,acc,0.326,0.014830507204541037,0 anli_r2,acc,0.334,0.014922019523732961,0 anli_r3,acc,0.3258333333333333,0.013535422043417455,0 arc_challenge,acc,0.28242320819112626,0.013155456884097224,0 arc_challenge,acc_norm,0.3097269624573379,0.013512058415238361,0 arc_easy,acc,0.6212121212121212,0.009953737656542037,0 arc_easy,acc_norm,0.5829124579124579,0.010117738967781986,0 boolq,acc,0.6045871559633027,0.008551600109082904,1 cb,acc,0.48214285714285715,0.0673769750864465,1 cb,f1,0.33001107419712067,,1 copa,acc,0.75,0.04351941398892446,0 hellaswag,acc,0.4678350926110337,0.004979446038824758,0 hellaswag,acc_norm,0.6130252937661821,0.0048606237334611405,0 piqa,acc,0.7453754080522307,0.010164432237060492,0 piqa,acc_norm,0.7595212187159956,0.009971345364651064,0 rte,acc,0.5451263537906137,0.029973636495415255,0 sciq,acc,0.888,0.009977753031397234,0 sciq,acc_norm,0.862,0.010912152632504401,0 storycloze_2016,acc,0.7097808658471406,0.010495529690730063,0 winogrande,acc,0.585635359116022,0.013844846232268563,0