task,metric,value,err,version anli_r1,acc,0.333,0.014910846164229871,0 anli_r2,acc,0.342,0.01500870618212173,0 anli_r3,acc,0.3383333333333333,0.013664144006618278,0 arc_challenge,acc,0.23890784982935154,0.012461071376316614,0 arc_challenge,acc_norm,0.27986348122866894,0.013119040897725925,0 arc_easy,acc,0.561026936026936,0.010183076012972067,0 arc_easy,acc_norm,0.5084175084175084,0.010258329515226459,0 boolq,acc,0.6119266055045871,0.008523130584760848,1 cb,acc,0.39285714285714285,0.0658538889806635,1 cb,f1,0.18803418803418803,,1 copa,acc,0.76,0.04292346959909282,0 hellaswag,acc,0.4357697669786895,0.004948439229523916,0 hellaswag,acc_norm,0.5615415255925115,0.0049518409782196935,0 piqa,acc,0.7328618063112078,0.010323440492612433,0 piqa,acc_norm,0.7426550598476604,0.01019992106479251,0 rte,acc,0.5234657039711191,0.03006330041190266,0 sciq,acc,0.812,0.012361586015103761,0 sciq,acc_norm,0.729,0.014062601350986186,0 storycloze_2016,acc,0.694815606627472,0.010648664383985661,0 winogrande,acc,0.5682715074980268,0.013920872110010713,0