task,metric,value,err,version anli_r1,acc,0.338,0.014965960710224482,0 anli_r2,acc,0.34,0.014987482264363937,0 anli_r3,acc,0.3458333333333333,0.013736245342311012,0 arc_challenge,acc,0.26706484641638223,0.012928933196496354,0 arc_challenge,acc_norm,0.28668941979522183,0.013214986329274776,0 arc_easy,acc,0.5955387205387206,0.010070746648278783,0 arc_easy,acc_norm,0.5378787878787878,0.010230299628864806,0 boolq,acc,0.5758409785932722,0.008643869023388128,1 cb,acc,0.4107142857142857,0.0663363415035954,1 cb,f1,0.2938907552569367,,1 copa,acc,0.76,0.04292346959909283,0 hellaswag,acc,0.44523003385779725,0.004959754882055468,0 hellaswag,acc_norm,0.5769766978689504,0.004930293787545608,0 piqa,acc,0.7274211099020674,0.010389256803296018,0 piqa,acc_norm,0.7404787812840044,0.010227939888173923,0 rte,acc,0.5523465703971119,0.02993107036293953,0 sciq,acc,0.843,0.011510146979230196,0 sciq,acc_norm,0.773,0.013253174964763931,0 storycloze_2016,acc,0.6835916622127205,0.01075478009794089,0 winogrande,acc,0.5485398579321231,0.013986110301017762,0