task,metric,value,err,version anli_r1,acc,0.364,0.01522286884052202,0 anli_r2,acc,0.348,0.01507060460376841,0 anli_r3,acc,0.3566666666666667,0.013833742805050715,0 arc_challenge,acc,0.2363481228668942,0.012414960524301842,0 arc_challenge,acc_norm,0.2696245733788396,0.012968040686869148,0 arc_easy,acc,0.4831649831649832,0.010253966261288895,0 arc_easy,acc_norm,0.4734848484848485,0.010245347015573706,0 boolq,acc,0.537308868501529,0.00872067560638845,1 cb,acc,0.5357142857142857,0.06724777654937658,1 cb,f1,0.3263888888888889,,1 copa,acc,0.64,0.04824181513244218,0 hellaswag,acc,0.3935471021708823,0.004875379352079819,0 hellaswag,acc_norm,0.4924317864967138,0.00498920977074323,0 piqa,acc,0.7007616974972797,0.01068413067313458,0 piqa,acc_norm,0.6931447225244831,0.010760295070580381,0 rte,acc,0.5667870036101083,0.029826764082138277,0 sciq,acc,0.761,0.013493000446937587,0 sciq,acc_norm,0.749,0.01371813351688892,0 storycloze_2016,acc,0.6493853554249065,0.011034317290463294,0 winogrande,acc,0.5153906866614049,0.01404582678978366,0