task,metric,value,err,version anli_r1,acc,0.316,0.014709193056057127,0 anli_r2,acc,0.337,0.014955087918653609,0 anli_r3,acc,0.355,0.0138192490040473,0 arc_challenge,acc,0.27559726962457337,0.013057169655761841,0 arc_challenge,acc_norm,0.30204778156996587,0.013417519144716413,0 arc_easy,acc,0.5896464646464646,0.010093531255765457,0 arc_easy,acc_norm,0.571969696969697,0.01015294331642626,0 boolq,acc,0.5831804281345566,0.008623192108843677,1 cb,acc,0.44642857142857145,0.06703189227942398,1 cb,f1,0.25805555555555554,,1 copa,acc,0.79,0.040936018074033256,0 hellaswag,acc,0.4627564230233021,0.004975919665116542,0 hellaswag,acc_norm,0.6117307309300936,0.004863603638367434,0 piqa,acc,0.7480957562568009,0.010128421335088683,0 piqa,acc_norm,0.7595212187159956,0.009971345364651066,0 rte,acc,0.5270758122743683,0.0300523034631437,0 sciq,acc,0.834,0.011772110370812184,0 sciq,acc_norm,0.793,0.012818553557843986,0 storycloze_2016,acc,0.711918760021379,0.010472537019822576,0 winogrande,acc,0.5824782951854776,0.013859978264440251,0