task,metric,value,err,version anli_r1,acc,0.314,0.01468399195108797,0 anli_r2,acc,0.332,0.014899597242811478,0 anli_r3,acc,0.33666666666666667,0.013647602942406401,0 arc_challenge,acc,0.2696245733788396,0.012968040686869142,0 arc_challenge,acc_norm,0.30119453924914674,0.013406741767847627,0 arc_easy,acc,0.6186868686868687,0.009966542497171016,0 arc_easy,acc_norm,0.5349326599326599,0.010234713052723679,0 boolq,acc,0.6162079510703364,0.008505584729104973,1 cb,acc,0.39285714285714285,0.0658538889806635,1 cb,f1,0.2689474934663815,,1 copa,acc,0.76,0.04292346959909283,0 hellaswag,acc,0.4790878311093408,0.004985415250690911,0 hellaswag,acc_norm,0.6315475004979088,0.0048139910698082634,0 piqa,acc,0.7562568008705114,0.010017199471500619,0 piqa,acc_norm,0.7600652883569097,0.009963625892809545,0 rte,acc,0.5703971119133574,0.02979666882912467,0 sciq,acc,0.86,0.010978183844357801,0 sciq,acc_norm,0.779,0.013127502859696239,0 storycloze_2016,acc,0.7177979690005345,0.010407834479647673,0 winogrande,acc,0.5737963693764798,0.013898585965412338,0