task,metric,value,err,version anli_r1,acc,0.29,0.014356395999905697,0 anli_r2,acc,0.333,0.014910846164229868,0 anli_r3,acc,0.3358333333333333,0.013639261190932873,0 arc_challenge,acc,0.3361774744027304,0.013804855026205756,0 arc_challenge,acc_norm,0.3583617747440273,0.014012883334859868,0 arc_easy,acc,0.6830808080808081,0.009547254611446381,0 arc_easy,acc_norm,0.6654040404040404,0.009682137724327909,0 boolq,acc,0.6584097859327217,0.008294560677768499,1 cb,acc,0.17857142857142858,0.051642771820087224,1 cb,f1,0.1770273087346258,,1 copa,acc,0.83,0.037752516806863715,0 hellaswag,acc,0.5314678350926111,0.004979889597551663,0 hellaswag,acc_norm,0.7079267078271261,0.0045378651714140265,0 piqa,acc,0.7720348204570185,0.00978809383232491,0 piqa,acc_norm,0.7780195865070729,0.00969612074466202,0 rte,acc,0.5126353790613718,0.030086851767188564,0 sciq,acc,0.943,0.0073351758537068225,0 sciq,acc_norm,0.934,0.007855297938697587,0 storycloze_2016,acc,0.757883484767504,0.009905870033193868,0 winogrande,acc,0.6495659037095501,0.013409047676670187,0