task,metric,value,err,version anli_r1,acc,0.332,0.01489959724281149,0 anli_r2,acc,0.362,0.015204840912919503,0 anli_r3,acc,0.33416666666666667,0.013622434813136788,0 arc_challenge,acc,0.28071672354948807,0.013131238126975578,0 arc_challenge,acc_norm,0.3046075085324232,0.013449522109932489,0 arc_easy,acc,0.6014309764309764,0.010046455400477943,0 arc_easy,acc_norm,0.585016835016835,0.01011038315196114,0 boolq,acc,0.5688073394495413,0.008661853128165595,1 cb,acc,0.4642857142857143,0.06724777654937658,1 cb,f1,0.4217687074829932,,1 copa,acc,0.71,0.045604802157206845,0 hellaswag,acc,0.40420235012945627,0.004897340793314381,0 hellaswag,acc_norm,0.5269866560446126,0.004982508198584267,0 piqa,acc,0.7274211099020674,0.010389256803296023,0 piqa,acc_norm,0.7290533188248096,0.010369718937426844,0 rte,acc,0.5776173285198556,0.02973162264649588,0 sciq,acc,0.918,0.008680515615523727,0 sciq,acc_norm,0.908,0.009144376393151098,0 storycloze_2016,acc,0.6675574559059326,0.01089386077834354,0 winogrande,acc,0.5351223362273086,0.014017773120881585,0