task,metric,value,err,version anli_r1,acc,0.325,0.014818724459095524,0 anli_r2,acc,0.322,0.014782913600996678,0 anli_r3,acc,0.335,0.013630871843821476,0 arc_challenge,acc,0.2883959044368601,0.013238394422428164,0 arc_challenge,acc_norm,0.302901023890785,0.013428241573185349,0 arc_easy,acc,0.6203703703703703,0.009958037725468575,0 arc_easy,acc_norm,0.5959595959595959,0.010069061649549549,0 boolq,acc,0.5923547400611621,0.008594580270731615,1 cb,acc,0.42857142857142855,0.06672848092813058,1 cb,f1,0.3018475149622691,,1 copa,acc,0.79,0.040936018074033256,0 hellaswag,acc,0.4645488946425015,0.004977223485342027,0 hellaswag,acc_norm,0.6139215295757817,0.004858539527872464,0 piqa,acc,0.750272034820457,0.010099232969867486,0 piqa,acc_norm,0.750816104461371,0.01009188277012021,0 rte,acc,0.5595667870036101,0.029882123363118712,0 sciq,acc,0.893,0.009779910359847167,0 sciq,acc_norm,0.879,0.010318210380946088,0 storycloze_2016,acc,0.721004810261892,0.010371620932652793,0 winogrande,acc,0.579321231254933,0.013874526372008327,0