task,metric,value,err,version anli_r1,acc,0.329,0.014865395385928367,0 anli_r2,acc,0.32,0.014758652303574878,0 anli_r3,acc,0.3408333333333333,0.013688600793296939,0 arc_challenge,acc,0.26023890784982934,0.012821930225112566,0 arc_challenge,acc_norm,0.3003412969283277,0.013395909309956997,0 arc_easy,acc,0.6102693602693603,0.010007169391797051,0 arc_easy,acc_norm,0.5538720538720538,0.01020005782876501,0 boolq,acc,0.6180428134556575,0.008497851998427192,1 cb,acc,0.48214285714285715,0.0673769750864465,1 cb,f1,0.3335687382297552,,1 copa,acc,0.76,0.04292346959909283,0 hellaswag,acc,0.48028281218880703,0.004985900172317698,0 hellaswag,acc_norm,0.6314479187412866,0.004814261966376849,0 piqa,acc,0.7633297062023939,0.009916841655042809,0 piqa,acc_norm,0.7665941240478781,0.009869247889520986,0 rte,acc,0.5090252707581228,0.030091559826331334,0 sciq,acc,0.853,0.011203415395160336,0 sciq,acc_norm,0.773,0.013253174964763921,0 storycloze_2016,acc,0.7215392838054516,0.010365521460604413,0 winogrande,acc,0.5682715074980268,0.01392087211001071,0