task,metric,value,err,version anli_r1,acc,0.321,0.014770821817934656,0 anli_r2,acc,0.333,0.014910846164229859,0 anli_r3,acc,0.3475,0.013751753243291852,0 arc_challenge,acc,0.3122866894197952,0.013542598541688064,0 arc_challenge,acc_norm,0.33532423208191126,0.013796182947785566,0 arc_easy,acc,0.6359427609427609,0.009873293392779118,0 arc_easy,acc_norm,0.6325757575757576,0.00989255261621155,0 boolq,acc,0.600611620795107,0.008566178448007833,1 cb,acc,0.39285714285714285,0.0658538889806635,1 cb,f1,0.269763077644851,,1 copa,acc,0.79,0.040936018074033256,0 hellaswag,acc,0.4523999203345947,0.004967118575905285,0 hellaswag,acc_norm,0.5977892850029874,0.004893418929918259,0 piqa,acc,0.750272034820457,0.010099232969867488,0 piqa,acc_norm,0.7573449401523396,0.010002002569708688,0 rte,acc,0.5054151624548736,0.030094698123239966,0 sciq,acc,0.926,0.008282064512704159,0 sciq,acc_norm,0.928,0.008178195576218681,0 storycloze_2016,acc,0.711918760021379,0.010472537019822575,0 winogrande,acc,0.5864246250986582,0.013840971763195308,0