task,metric,value,err,version anli_r1,acc,0.319,0.014746404865473486,0 anli_r2,acc,0.33,0.01487687202745673,0 anli_r3,acc,0.32,0.013471620929769142,0 arc_challenge,acc,0.33276450511945393,0.01376986304619231,0 arc_challenge,acc_norm,0.3643344709897611,0.014063260279882415,0 arc_easy,acc,0.6784511784511784,0.00958409157564062,0 arc_easy,acc_norm,0.6607744107744108,0.00971491720776585,0 boolq,acc,0.6590214067278287,0.00829097981816109,1 cb,acc,0.42857142857142855,0.06672848092813058,1 cb,f1,0.3049446887911502,,1 copa,acc,0.82,0.038612291966536955,0 hellaswag,acc,0.5259908384783908,0.004983035420235712,0 hellaswag,acc_norm,0.7002589125672177,0.004572081656965643,0 piqa,acc,0.7780195865070729,0.009696120744662019,0 piqa,acc_norm,0.7872687704026116,0.009548223123047352,0 rte,acc,0.5812274368231047,0.02969666108123484,0 sciq,acc,0.93,0.008072494358323499,0 sciq,acc_norm,0.91,0.009054390204866442,0 storycloze_2016,acc,0.7616247995724211,0.009853267441685421,0 winogrande,acc,0.6266771902131019,0.013594002763035516,0