task,metric,value,err,version anli_r1,acc,0.308,0.014606483127342763,0 anli_r2,acc,0.329,0.014865395385928373,0 anli_r3,acc,0.31916666666666665,0.013462309712005124,0 arc_challenge,acc,0.28668941979522183,0.01321498632927477,0 arc_challenge,acc_norm,0.310580204778157,0.013522292098053057,0 arc_easy,acc,0.6308922558922558,0.009901987410242747,0 arc_easy,acc_norm,0.6136363636363636,0.009991296778159615,0 boolq,acc,0.6168195718654435,0.008503021391450788,1 cb,acc,0.5714285714285714,0.06672848092813059,1 cb,f1,0.40095238095238095,,1 copa,acc,0.81,0.03942772444036623,0 hellaswag,acc,0.46634136626170086,0.004978462690966927,0 hellaswag,acc_norm,0.6188010356502689,0.00484688692976345,0 piqa,acc,0.7464635473340587,0.010150090834551788,0 piqa,acc_norm,0.7589771490750816,0.009979042717267314,0 rte,acc,0.5703971119133574,0.02979666882912467,0 sciq,acc,0.915,0.008823426366942331,0 sciq,acc_norm,0.903,0.009363689373248123,0 storycloze_2016,acc,0.7226082308925709,0.010353267472010767,0 winogrande,acc,0.5674822415153907,0.013923911578623837,0