task,metric,value,err,version anli_r1,acc,0.335,0.014933117490932575,0 anli_r2,acc,0.326,0.014830507204541033,0 anli_r3,acc,0.3441666666666667,0.013720551062295756,0 arc_challenge,acc,0.2713310580204778,0.012993807727545794,0 arc_challenge,acc_norm,0.302901023890785,0.013428241573185349,0 arc_easy,acc,0.6132154882154882,0.009993308355370968,0 arc_easy,acc_norm,0.5774410774410774,0.010135978222981071,0 boolq,acc,0.5685015290519878,0.008662594569027316,1 cb,acc,0.4107142857142857,0.0663363415035954,1 cb,f1,0.28917378917378916,,1 copa,acc,0.78,0.04163331998932262,0 hellaswag,acc,0.4714200358494324,0.00498162329219619,0 hellaswag,acc_norm,0.6203943437562238,0.004842969887794082,0 piqa,acc,0.7464635473340587,0.010150090834551794,0 piqa,acc_norm,0.749183895538629,0.010113869547069046,0 rte,acc,0.51985559566787,0.030072723167317177,0 sciq,acc,0.868,0.010709373963528012,0 sciq,acc_norm,0.841,0.0115694793682713,0 storycloze_2016,acc,0.7049706039551042,0.010546232606962283,0 winogrande,acc,0.5682715074980268,0.01392087211001071,0