task,metric,value,err,version anli_r1,acc,0.334,0.014922019523732963,0 anli_r2,acc,0.333,0.014910846164229868,0 anli_r3,acc,0.3491666666666667,0.013767075395077247,0 arc_challenge,acc,0.29266211604095566,0.013295916103619411,0 arc_challenge,acc_norm,0.3225255972696246,0.01365998089427737,0 arc_easy,acc,0.6212121212121212,0.009953737656542035,0 arc_easy,acc_norm,0.5833333333333334,0.010116282977781254,0 boolq,acc,0.599388379204893,0.008570545612096374,1 cb,acc,0.35714285714285715,0.0646095738380922,1 cb,f1,0.23179160021265285,,1 copa,acc,0.79,0.040936018074033256,0 hellaswag,acc,0.48088030272854015,0.004986131919673967,0 hellaswag,acc_norm,0.630053774148576,0.004818031396138917,0 piqa,acc,0.7529923830250272,0.01006226814077262,0 piqa,acc_norm,0.7627856365614799,0.009924694933586374,0 rte,acc,0.5667870036101083,0.029826764082138277,0 sciq,acc,0.887,0.010016552866696848,0 sciq,acc_norm,0.876,0.01042749887234396,0 storycloze_2016,acc,0.7204703367183325,0.01037770209970486,0 winogrande,acc,0.5951065509076559,0.013795927003124939,0