task,metric,value,err,version anli_r1,acc,0.34,0.014987482264363937,0 anli_r2,acc,0.317,0.014721675438880224,0 anli_r3,acc,0.345,0.01372842153945488,0 arc_challenge,acc,0.2901023890784983,0.013261573677520764,0 arc_challenge,acc_norm,0.318259385665529,0.013611993916971453,0 arc_easy,acc,0.6123737373737373,0.00999730791444761,0 arc_easy,acc_norm,0.5715488215488216,0.010154195733990972,0 boolq,acc,0.6146788990825688,0.008511930879680642,1 cb,acc,0.48214285714285715,0.0673769750864465,1 cb,f1,0.3566561844863732,,1 copa,acc,0.75,0.04351941398892446,0 hellaswag,acc,0.4736108344951205,0.00498282691668715,0 hellaswag,acc_norm,0.6191993626767576,0.004845912857338672,0 piqa,acc,0.7551686615886833,0.010032309105568798,0 piqa,acc_norm,0.7529923830250272,0.010062268140772636,0 rte,acc,0.5740072202166066,0.029764956741777645,0 sciq,acc,0.898,0.00957536880165389,0 sciq,acc_norm,0.875,0.010463483381956722,0 storycloze_2016,acc,0.7124532335649385,0.010466744473098357,0 winogrande,acc,0.5706393054459353,0.013911537499969165,0