task,metric,value,err,version anli_r1,acc,0.326,0.01483050720454105,0 anli_r2,acc,0.317,0.014721675438880226,0 anli_r3,acc,0.33416666666666667,0.013622434813136783,0 arc_challenge,acc,0.2815699658703072,0.013143376735009031,0 arc_challenge,acc_norm,0.3199658703071672,0.013631345807016191,0 arc_easy,acc,0.6035353535353535,0.010037412763064526,0 arc_easy,acc_norm,0.5782828282828283,0.010133255284012327,0 boolq,acc,0.6162079510703364,0.008505584729104967,1 cb,acc,0.44642857142857145,0.06703189227942398,1 cb,f1,0.24002574002573998,,1 copa,acc,0.76,0.04292346959909283,0 hellaswag,acc,0.468034256124278,0.0049795737655758555,0 hellaswag,acc_norm,0.6201951802429795,0.004843462545943488,0 piqa,acc,0.73449401523395,0.010303308653024429,0 piqa,acc_norm,0.7459194776931447,0.01015727199913505,0 rte,acc,0.5379061371841155,0.030009848912529113,0 sciq,acc,0.875,0.010463483381956722,0 sciq,acc_norm,0.861,0.010945263761042968,0 storycloze_2016,acc,0.703901656867985,0.010557307688475116,0 winogrande,acc,0.5824782951854776,0.013859978264440253,0