task,metric,value,err,version anli_r1,acc,0.332,0.014899597242811487,0 anli_r2,acc,0.337,0.014955087918653605,0 anli_r3,acc,0.3433333333333333,0.01371263383046586,0 arc_challenge,acc,0.2764505119453925,0.013069662474252425,0 arc_challenge,acc_norm,0.2960750853242321,0.013340916085246258,0 arc_easy,acc,0.5963804713804713,0.01006736896034822,0 arc_easy,acc_norm,0.5382996632996633,0.010229639820610512,0 boolq,acc,0.6296636085626911,0.008445882436783665,1 cb,acc,0.4107142857142857,0.0663363415035954,1 cb,f1,0.3312277706643904,,1 copa,acc,0.73,0.0446196043338474,0 hellaswag,acc,0.4765982871937861,0.004984313205791442,0 hellaswag,acc_norm,0.6216889065923122,0.004839746491523515,0 piqa,acc,0.750816104461371,0.010091882770120216,0 piqa,acc_norm,0.7589771490750816,0.009979042717267312,0 rte,acc,0.5740072202166066,0.02976495674177765,0 sciq,acc,0.853,0.011203415395160336,0 sciq,acc_norm,0.762,0.013473586661967222,0 storycloze_2016,acc,0.7194013896312133,0.010389809647288816,0 winogrande,acc,0.5714285714285714,0.013908353814606696,0