task,metric,value,err,version anli_r1,acc,0.327,0.014842213153411237,0 anli_r2,acc,0.322,0.014782913600996667,0 anli_r3,acc,0.3441666666666667,0.013720551062295756,0 arc_challenge,acc,0.29180887372013653,0.013284525292403501,0 arc_challenge,acc_norm,0.3165529010238908,0.01359243151906808,0 arc_easy,acc,0.6287878787878788,0.00991359900184574,0 arc_easy,acc_norm,0.6153198653198653,0.009983171707009008,0 boolq,acc,0.6107033639143731,0.008528016290984541,1 cb,acc,0.5178571428571429,0.06737697508644647,1 cb,f1,0.4381559220389805,,1 copa,acc,0.78,0.04163331998932263,0 hellaswag,acc,0.4671380203146783,0.004978992721242829,0 hellaswag,acc_norm,0.6192989444333798,0.004845668799108534,0 piqa,acc,0.7551686615886833,0.010032309105568798,0 piqa,acc_norm,0.7633297062023939,0.009916841655042809,0 rte,acc,0.5270758122743683,0.030052303463143706,0 sciq,acc,0.902,0.009406619184621223,0 sciq,acc_norm,0.889,0.009938701010583726,0 storycloze_2016,acc,0.7183324425440941,0.010401844358587667,0 winogrande,acc,0.5872138910812944,0.013837060648682089,0