task,metric,value,err,version anli_r1,acc,0.335,0.014933117490932573,0 anli_r2,acc,0.348,0.01507060460376841,0 anli_r3,acc,0.3616666666666667,0.01387613166312387,0 arc_challenge,acc,0.2781569965870307,0.013094469919538807,0 arc_challenge,acc_norm,0.31313993174061433,0.013552671543623501,0 arc_easy,acc,0.5909090909090909,0.01008877515261579,0 arc_easy,acc_norm,0.5580808080808081,0.010190328123071768,0 boolq,acc,0.6168195718654435,0.008503021391450783,1 cb,acc,0.44642857142857145,0.06703189227942397,1 cb,f1,0.3113026819923372,,1 copa,acc,0.82,0.038612291966536955,0 hellaswag,acc,0.465345548695479,0.004977782217582457,0 hellaswag,acc_norm,0.6089424417446724,0.004869899297734556,0 piqa,acc,0.7464635473340587,0.010150090834551786,0 piqa,acc_norm,0.7540805223068553,0.010047331865625184,0 rte,acc,0.555956678700361,0.029907396333795994,0 sciq,acc,0.845,0.01145015747079947,0 sciq,acc_norm,0.801,0.012631649083099182,0 storycloze_2016,acc,0.7199358631747729,0.010383764993920483,0 winogrande,acc,0.5872138910812944,0.013837060648682101,0