task,metric,value,err,version anli_r1,acc,0.323,0.01479492784334864,0 anli_r2,acc,0.35,0.015090650341444233,0 anli_r3,acc,0.3441666666666667,0.013720551062295755,0 arc_challenge,acc,0.2790102389078498,0.013106784883601336,0 arc_challenge,acc_norm,0.3046075085324232,0.01344952210993249,0 arc_easy,acc,0.5989057239057239,0.01005705110653437,0 arc_easy,acc_norm,0.5425084175084175,0.010222638127749496,0 boolq,acc,0.5886850152905199,0.008606395426309208,1 cb,acc,0.3392857142857143,0.06384226561930825,1 cb,f1,0.26343091936312274,,1 copa,acc,0.72,0.04512608598542127,0 hellaswag,acc,0.475502887870942,0.004983788992681198,0 hellaswag,acc_norm,0.6266679944234216,0.004827006520802888,0 piqa,acc,0.7551686615886833,0.010032309105568793,0 piqa,acc_norm,0.763873775843308,0.009908965890558216,0 rte,acc,0.5631768953068592,0.02985524739031495,0 sciq,acc,0.849,0.011328165223341671,0 sciq,acc_norm,0.757,0.013569640199177451,0 storycloze_2016,acc,0.7156600748262961,0.01043161412866525,0 winogrande,acc,0.5761641673243884,0.013888492389944508,0