task,metric,value,err,version anli_r1,acc,0.331,0.014888272588203936,0 anli_r2,acc,0.336,0.014944140233795027,0 anli_r3,acc,0.3425,0.013704669762934727,0 arc_challenge,acc,0.2781569965870307,0.013094469919538816,0 arc_challenge,acc_norm,0.29436860068259385,0.013318528460539426,0 arc_easy,acc,0.609006734006734,0.010012992232540633,0 arc_easy,acc_norm,0.5593434343434344,0.010187264635711991,0 boolq,acc,0.5892966360856269,0.008604460608471413,1 cb,acc,0.42857142857142855,0.06672848092813058,1 cb,f1,0.21956970232832299,,1 copa,acc,0.74,0.044084400227680794,0 hellaswag,acc,0.4480183230432185,0.004962742426849887,0 hellaswag,acc_norm,0.5839474208325035,0.0049189510191838875,0 piqa,acc,0.7442872687704026,0.010178690109459857,0 piqa,acc_norm,0.7546245919477693,0.010039831320422386,0 rte,acc,0.5631768953068592,0.029855247390314945,0 sciq,acc,0.865,0.010811655372416053,0 sciq,acc_norm,0.793,0.012818553557843983,0 storycloze_2016,acc,0.6916087653661144,0.010679734445487797,0 winogrande,acc,0.5730071033938438,0.01390187807257506,0