task,metric,value,err,version anli_r1,acc,0.324,0.014806864733738856,0 anli_r2,acc,0.333,0.014910846164229873,0 anli_r3,acc,0.33666666666666667,0.013647602942406401,0 arc_challenge,acc,0.29948805460750855,0.013385021637313565,0 arc_challenge,acc_norm,0.3387372013651877,0.01383056892797433,0 arc_easy,acc,0.6439393939393939,0.00982545460841631,0 arc_easy,acc_norm,0.640993265993266,0.009843424713072174,0 boolq,acc,0.5883792048929664,0.008607357686607963,1 cb,acc,0.35714285714285715,0.0646095738380922,1 cb,f1,0.2275946275946276,,1 copa,acc,0.8,0.040201512610368445,0 hellaswag,acc,0.4509061939852619,0.004965670398127354,0 hellaswag,acc_norm,0.5998805018920533,0.004889210628907973,0 piqa,acc,0.750816104461371,0.010091882770120216,0 piqa,acc_norm,0.750272034820457,0.010099232969867472,0 rte,acc,0.4584837545126354,0.029992535385373314,0 sciq,acc,0.922,0.008484573530118583,0 sciq,acc_norm,0.93,0.008072494358323499,0 storycloze_2016,acc,0.7081774452164618,0.010512588616199622,0 winogrande,acc,0.5824782951854776,0.013859978264440248,0