task,metric,value,err,version anli_r1,acc,0.331,0.014888272588203931,0 anli_r2,acc,0.329,0.014865395385928369,0 anli_r3,acc,0.3416666666666667,0.013696658778002512,0 arc_challenge,acc,0.2909556313993174,0.013273077865907578,0 arc_challenge,acc_norm,0.3165529010238908,0.013592431519068079,0 arc_easy,acc,0.6292087542087542,0.009911292822056925,0 arc_easy,acc_norm,0.6047979797979798,0.01003189405279098,0 boolq,acc,0.6073394495412844,0.008541161248702913,1 cb,acc,0.4107142857142857,0.0663363415035954,1 cb,f1,0.2951144094001237,,1 copa,acc,0.74,0.04408440022768079,0 hellaswag,acc,0.4667396932881896,0.0049787293000748915,0 hellaswag,acc_norm,0.6128261302529376,0.004861084534087031,0 piqa,acc,0.7464635473340587,0.01015009083455179,0 piqa,acc_norm,0.7557127312295974,0.01002476517228425,0 rte,acc,0.5306859205776173,0.030039730592197812,0 sciq,acc,0.904,0.009320454434783215,0 sciq,acc_norm,0.881,0.010244215145336666,0 storycloze_2016,acc,0.7194013896312133,0.01038980964728882,0 winogrande,acc,0.5895816890292028,0.013825107120035863,0