task,metric,value,err,version anli_r1,acc,0.337,0.014955087918653598,0 anli_r2,acc,0.34,0.014987482264363935,0 anli_r3,acc,0.36,0.013862183574189902,0 arc_challenge,acc,0.2363481228668942,0.012414960524301846,0 arc_challenge,acc_norm,0.26791808873720135,0.01294203019513644,0 arc_easy,acc,0.49747474747474746,0.010259652668783469,0 arc_easy,acc_norm,0.45202020202020204,0.010212436978834111,0 boolq,acc,0.4801223241590214,0.00873814151648864,1 cb,acc,0.5357142857142857,0.06724777654937658,1 cb,f1,0.35972850678733037,,1 copa,acc,0.67,0.047258156262526066,0 hellaswag,acc,0.3857797251543517,0.004857840934549165,0 hellaswag,acc_norm,0.4766978689504083,0.004984359669951926,0 piqa,acc,0.705114254624592,0.010639030620157003,0 piqa,acc_norm,0.6985854189336235,0.01070624824275376,0 rte,acc,0.5234657039711191,0.03006330041190266,0 sciq,acc,0.717,0.014251810906481754,0 sciq,acc_norm,0.646,0.015129868238451773,0 storycloze_2016,acc,0.6568679850347408,0.010978648097499879,0 winogrande,acc,0.5193370165745856,0.01404197273371297,0