task,metric,value,err,version anli_r1,acc,0.331,0.014888272588203934,0 anli_r2,acc,0.352,0.015110404505648668,0 anli_r3,acc,0.35583333333333333,0.01382651874849331,0 arc_challenge,acc,0.32081911262798635,0.013640943091946526,0 arc_challenge,acc_norm,0.33447098976109213,0.013787460322441374,0 arc_easy,acc,0.6637205387205387,0.009694178072725206,0 arc_easy,acc_norm,0.5896464646464646,0.010093531255765452,0 boolq,acc,0.6284403669724771,0.008451598145076575,1 cb,acc,0.2857142857142857,0.06091449038731724,1 cb,f1,0.1717171717171717,,1 copa,acc,0.8,0.040201512610368445,0 hellaswag,acc,0.5319657438757219,0.004979573765575866,0 hellaswag,acc_norm,0.7045409281019717,0.004553164013379556,0 piqa,acc,0.7731229597388466,0.009771584259215172,0 piqa,acc_norm,0.7829162132752993,0.009618708415756788,0 rte,acc,0.5667870036101083,0.029826764082138277,0 sciq,acc,0.89,0.00989939381972444,0 sciq,acc_norm,0.815,0.012285191326386684,0 storycloze_2016,acc,0.7525387493319081,0.009979234591920141,0 winogrande,acc,0.6243093922651933,0.013611257508380437,0