task,metric,value,err,version anli_r1,acc,0.339,0.01497675877162034,0 anli_r2,acc,0.335,0.014933117490932573,0 anli_r3,acc,0.33416666666666667,0.013622434813136783,0 arc_challenge,acc,0.17918088737201365,0.011207045216615674,0 arc_challenge,acc_norm,0.2235494880546075,0.012174896631202614,0 arc_easy,acc,0.4335016835016835,0.010168640625454107,0 arc_easy,acc_norm,0.3846801346801347,0.009983171707009006,0 boolq,acc,0.5938837920489297,0.008589510943787407,1 cb,acc,0.4107142857142857,0.0663363415035954,1 cb,f1,0.1940928270042194,,1 copa,acc,0.62,0.04878317312145632,0 hellaswag,acc,0.2951603266281617,0.004551826272978059,0 hellaswag,acc_norm,0.3241386178052181,0.004670955399641126,0 piqa,acc,0.6218715995647442,0.011313980666854535,0 piqa,acc_norm,0.6267682263329706,0.011284653078254898,0 rte,acc,0.5234657039711191,0.03006330041190266,0 sciq,acc,0.732,0.01401329270272948,0 sciq,acc_norm,0.669,0.01488827258820394,0 storycloze_2016,acc,0.5873864243719936,0.011384472322969045,0 winogrande,acc,0.5059194948697711,0.01405150083848581,0