task,metric,value,err,version anli_r1,acc,0.325,0.014818724459095526,0 anli_r2,acc,0.326,0.014830507204541033,0 anli_r3,acc,0.3475,0.013751753243291854,0 arc_challenge,acc,0.31569965870307165,0.013582571095815293,0 arc_challenge,acc_norm,0.3250853242320819,0.013688147309729124,0 arc_easy,acc,0.6367845117845118,0.009868397136118794,0 arc_easy,acc_norm,0.63510101010101,0.009878157021155649,0 boolq,acc,0.6039755351681957,0.008553881336813412,1 cb,acc,0.48214285714285715,0.06737697508644648,1 cb,f1,0.3356643356643356,,1 copa,acc,0.72,0.04512608598542129,0 hellaswag,acc,0.4523003385779725,0.004967023435680015,0 hellaswag,acc_norm,0.5990838478390759,0.004890824718530304,0 piqa,acc,0.750816104461371,0.01009188277012022,0 piqa,acc_norm,0.7546245919477693,0.010039831320422386,0 rte,acc,0.49458483754512633,0.030094698123239966,0 sciq,acc,0.927,0.00823035471524406,0 sciq,acc_norm,0.924,0.008384169266796384,0 storycloze_2016,acc,0.694815606627472,0.010648664383985665,0 winogrande,acc,0.5911602209944752,0.013816954295135686,0