task,metric,value,err,version anli_r1,acc,0.34,0.014987482264363937,0 anli_r2,acc,0.327,0.014842213153411242,0 anli_r3,acc,0.33416666666666667,0.013622434813136783,0 arc_challenge,acc,0.27303754266211605,0.013019332762635746,0 arc_challenge,acc_norm,0.2841296928327645,0.013179442447653886,0 arc_easy,acc,0.5951178451178452,0.010072423960395703,0 arc_easy,acc_norm,0.561026936026936,0.01018307601297206,0 boolq,acc,0.5813455657492355,0.008628545022868549,1 cb,acc,0.4642857142857143,0.06724777654937658,1 cb,f1,0.32523809523809527,,1 copa,acc,0.74,0.04408440022768079,0 hellaswag,acc,0.4448317068313085,0.00495931519801116,0 hellaswag,acc_norm,0.578370842461661,0.004928105880776072,0 piqa,acc,0.7285092491838956,0.010376251176596137,0 piqa,acc_norm,0.735582154515778,0.01028978724476716,0 rte,acc,0.5451263537906137,0.029973636495415255,0 sciq,acc,0.862,0.0109121526325044,0 sciq,acc_norm,0.828,0.011939788882495321,0 storycloze_2016,acc,0.7033671833244255,0.01056281918156322,0 winogrande,acc,0.5588003157063931,0.013954975072834731,0