task,metric,value,err,version anli_r1,acc,0.324,0.014806864733738857,0 anli_r2,acc,0.338,0.014965960710224498,0 anli_r3,acc,0.3525,0.013797164918918362,0 arc_challenge,acc,0.2841296928327645,0.013179442447653887,0 arc_challenge,acc_norm,0.3037542662116041,0.01343890918477876,0 arc_easy,acc,0.6043771043771043,0.010033741393430983,0 arc_easy,acc_norm,0.5749158249158249,0.010143966195717845,0 boolq,acc,0.5730886850152905,0.008651119069643816,1 cb,acc,0.42857142857142855,0.06672848092813057,1 cb,f1,0.25882352941176473,,1 copa,acc,0.81,0.03942772444036623,0 hellaswag,acc,0.45907189802828124,0.004973036453863711,0 hellaswag,acc_norm,0.6099382593108943,0.004867670042866713,0 piqa,acc,0.7480957562568009,0.010128421335088683,0 piqa,acc_norm,0.7573449401523396,0.01000200256970869,0 rte,acc,0.5234657039711191,0.030063300411902652,0 sciq,acc,0.836,0.011715000693181331,0 sciq,acc_norm,0.791,0.012864077288499337,0 storycloze_2016,acc,0.7151256012827365,0.010437513986611718,0 winogrande,acc,0.5777426992896606,0.013881582030658552,0