task,metric,value,err,version anli_r1,acc,0.333,0.014910846164229868,0 anli_r2,acc,0.339,0.014976758771620344,0 anli_r3,acc,0.33916666666666667,0.013672343491681819,0 arc_challenge,acc,0.18600682593856654,0.011370940183266749,0 arc_challenge,acc_norm,0.22610921501706485,0.01222420209706328,0 arc_easy,acc,0.42003367003367004,0.010127718838529398,0 arc_easy,acc_norm,0.3728956228956229,0.009922743197129255,0 boolq,acc,0.6051987767584098,0.008549304887647411,1 cb,acc,0.4107142857142857,0.0663363415035954,1 cb,f1,0.1940928270042194,,1 copa,acc,0.62,0.04878317312145632,0 hellaswag,acc,0.2949611631149173,0.004550933142528753,0 hellaswag,acc_norm,0.32463652658832903,0.004672819355838551,0 piqa,acc,0.6251360174102285,0.011294565805619017,0 piqa,acc_norm,0.6224156692056583,0.011310782787145772,0 rte,acc,0.5342960288808665,0.030025579819366422,0 sciq,acc,0.735,0.013963164754809949,0 sciq,acc_norm,0.656,0.015029633724408945,0 storycloze_2016,acc,0.5873864243719936,0.011384472322969045,0 winogrande,acc,0.516179952644041,0.014045126130978601,0