task,metric,value,err,version anli_r1,acc,0.308,0.014606483127342761,0 anli_r2,acc,0.334,0.014922019523732965,0 anli_r3,acc,0.32166666666666666,0.013490095282989521,0 arc_challenge,acc,0.28668941979522183,0.013214986329274774,0 arc_challenge,acc_norm,0.310580204778157,0.01352229209805305,0 arc_easy,acc,0.6212121212121212,0.009953737656542037,0 arc_easy,acc_norm,0.5963804713804713,0.010067368960348216,0 boolq,acc,0.6039755351681957,0.008553881336813415,1 cb,acc,0.4107142857142857,0.0663363415035954,1 cb,f1,0.24493628437290407,,1 copa,acc,0.81,0.03942772444036623,0 hellaswag,acc,0.47122087233618803,0.0049815090992763504,0 hellaswag,acc_norm,0.6280621390161323,0.004823341569605425,0 piqa,acc,0.7540805223068553,0.01004733186562519,0 piqa,acc_norm,0.764961915125136,0.009893146688805315,0 rte,acc,0.48375451263537905,0.030080573208738064,0 sciq,acc,0.903,0.009363689373248102,0 sciq,acc_norm,0.893,0.009779910359847167,0 storycloze_2016,acc,0.7167290219134153,0.010419760409155363,0 winogrande,acc,0.590370955011839,0.013821049109655478,0