task,metric,value,err,version anli_r1,acc,0.318,0.014734079309311901,0 anli_r2,acc,0.33,0.01487687202745673,0 anli_r3,acc,0.32916666666666666,0.01357080625843363,0 arc_challenge,acc,0.28668941979522183,0.013214986329274776,0 arc_challenge,acc_norm,0.302901023890785,0.013428241573185347,0 arc_easy,acc,0.6136363636363636,0.00999129677815963,0 arc_easy,acc_norm,0.6102693602693603,0.01000716939179705,0 boolq,acc,0.6024464831804281,0.00855952325693682,1 cb,acc,0.4642857142857143,0.0672477765493766,1 cb,f1,0.3314669652855543,,1 copa,acc,0.83,0.03775251680686371,0 hellaswag,acc,0.4684325831507668,0.0049798268294007665,0 hellaswag,acc_norm,0.6182035451105358,0.004848341560492134,0 piqa,acc,0.749727965179543,0.010106561880089782,0 piqa,acc_norm,0.7557127312295974,0.01002476517228425,0 rte,acc,0.5595667870036101,0.02988212336311871,0 sciq,acc,0.903,0.009363689373248107,0 sciq,acc_norm,0.886,0.010055103435823335,0 storycloze_2016,acc,0.7172634954569749,0.01041380648612127,0 winogrande,acc,0.5698500394632992,0.013914685094716698,0