task,metric,value,err,version anli_r1,acc,0.354,0.015129868238451772,0 anli_r2,acc,0.336,0.014944140233795016,0 anli_r3,acc,0.3475,0.013751753243291854,0 arc_challenge,acc,0.2909556313993174,0.01327307786590758,0 arc_challenge,acc_norm,0.3225255972696246,0.01365998089427737,0 arc_easy,acc,0.648989898989899,0.009793703885101045,0 arc_easy,acc_norm,0.6199494949494949,0.009960175831493126,0 boolq,acc,0.6345565749235474,0.008422437370062704,1 cb,acc,0.48214285714285715,0.0673769750864465,1 cb,f1,0.2828828828828829,,1 copa,acc,0.82,0.038612291966536955,0 hellaswag,acc,0.4772953594901414,0.004984634285101618,0 hellaswag,acc_norm,0.6366261700856403,0.00479988224849481,0 piqa,acc,0.7568008705114254,0.010009611953858922,0 piqa,acc_norm,0.766050054406964,0.009877236895137437,0 rte,acc,0.5595667870036101,0.029882123363118726,0 sciq,acc,0.92,0.008583336977753653,0 sciq,acc_norm,0.916,0.00877616208949112,0 storycloze_2016,acc,0.7274184927846071,0.010297209765351286,0 winogrande,acc,0.6093133385951065,0.013712536036556667,0