task,metric,value,err,version anli_r1,acc,0.326,0.014830507204541037,0 anli_r2,acc,0.337,0.014955087918653607,0 anli_r3,acc,0.33416666666666667,0.013622434813136774,0 arc_challenge,acc,0.28071672354948807,0.013131238126975576,0 arc_challenge,acc_norm,0.3037542662116041,0.013438909184778766,0 arc_easy,acc,0.5993265993265994,0.010055304474255573,0 arc_easy,acc_norm,0.5694444444444444,0.010160345396860082,0 boolq,acc,0.5752293577981651,0.008645503833361106,1 cb,acc,0.42857142857142855,0.06672848092813058,1 cb,f1,0.26622479977906655,,1 copa,acc,0.81,0.039427724440366234,0 hellaswag,acc,0.4629555865365465,0.004976067726432562,0 hellaswag,acc_norm,0.609838677554272,0.004867893927258165,0 piqa,acc,0.7437431991294886,0.01018578783156506,0 piqa,acc_norm,0.7524483133841132,0.010069703966857116,0 rte,acc,0.5270758122743683,0.0300523034631437,0 sciq,acc,0.844,0.011480235006122363,0 sciq,acc_norm,0.794,0.012795613612786548,0 storycloze_2016,acc,0.7145911277391769,0.010443395884062115,0 winogrande,acc,0.5824782951854776,0.013859978264440246,0