task,metric,value,err,version anli_r1,acc,0.326,0.014830507204541044,0 anli_r2,acc,0.344,0.015029633724408947,0 anli_r3,acc,0.35583333333333333,0.013826518748493315,0 arc_challenge,acc,0.31143344709897613,0.013532472099850949,0 arc_challenge,acc_norm,0.32764505119453924,0.013715847940719344,0 arc_easy,acc,0.6418350168350169,0.009838331651451844,0 arc_easy,acc_norm,0.6300505050505051,0.009906656266021155,0 boolq,acc,0.5574923547400612,0.008687051315181374,1 cb,acc,0.5357142857142857,0.06724777654937658,1 cb,f1,0.35670453061757407,,1 copa,acc,0.74,0.04408440022768078,0 hellaswag,acc,0.45140410276837284,0.004966158142645416,0 hellaswag,acc_norm,0.5934076877116112,0.004901936511546116,0 piqa,acc,0.7388465723612623,0.010248738649935581,0 piqa,acc_norm,0.750272034820457,0.010099232969867483,0 rte,acc,0.5415162454873647,0.02999253538537331,0 sciq,acc,0.913,0.008916866630745925,0 sciq,acc_norm,0.911,0.009008893392651516,0 storycloze_2016,acc,0.6996258685195083,0.010600915927985026,0 winogrande,acc,0.5698500394632992,0.013914685094716692,0