task,metric,value,err,version anli_r1,acc,0.319,0.01474640486547348,0 anli_r2,acc,0.35,0.015090650341444231,0 anli_r3,acc,0.3258333333333333,0.013535422043417455,0 arc_challenge,acc,0.22184300341296928,0.012141659068147884,0 arc_challenge,acc_norm,0.25853242320819114,0.012794553754288684,0 arc_easy,acc,0.4781144781144781,0.010249950427234155,0 arc_easy,acc_norm,0.46254208754208753,0.010230952104570805,0 boolq,acc,0.4437308868501529,0.008689501105367406,1 cb,acc,0.4107142857142857,0.06633634150359541,1 cb,f1,0.2796474358974359,,1 copa,acc,0.65,0.047937248544110196,0 hellaswag,acc,0.3988249352718582,0.0048865590087549884,0 hellaswag,acc_norm,0.4953196574387572,0.004989562798280527,0 piqa,acc,0.6953210010881393,0.010738889044325161,0 piqa,acc_norm,0.7018498367791077,0.010672964114008301,0 rte,acc,0.5270758122743683,0.030052303463143706,0 sciq,acc,0.754,0.013626065817750641,0 sciq,acc_norm,0.714,0.014297146862517908,0 storycloze_2016,acc,0.6509887760555852,0.011022640519108546,0 winogrande,acc,0.5185477505919495,0.014042813708888378,0