task,metric,value,err,version anli_r1,acc,0.338,0.014965960710224479,0 anli_r2,acc,0.337,0.014955087918653605,0 anli_r3,acc,0.33916666666666667,0.013672343491681815,0 arc_challenge,acc,0.19368600682593856,0.01154842540997854,0 arc_challenge,acc_norm,0.23976109215017063,0.012476304127453947,0 arc_easy,acc,0.45707070707070707,0.01022189756425604,0 arc_easy,acc_norm,0.39941077441077444,0.010050018228742113,0 boolq,acc,0.5403669724770642,0.008716508381476019,1 cb,acc,0.4107142857142857,0.0663363415035954,1 cb,f1,0.1940928270042194,,1 copa,acc,0.65,0.0479372485441102,0 hellaswag,acc,0.2916749651463852,0.004536045368404717,0 hellaswag,acc_norm,0.3174666401115316,0.004645393477680678,0 piqa,acc,0.6381936887921654,0.011211397313020373,0 piqa,acc_norm,0.6349292709466812,0.011233021830554834,0 rte,acc,0.5379061371841155,0.03000984891252912,0 sciq,acc,0.757,0.01356964019917743,0 sciq,acc_norm,0.675,0.014818724459095526,0 storycloze_2016,acc,0.5911277391769107,0.011368775493925617,0 winogrande,acc,0.4940805051302289,0.014051500838485807,0