task,metric,value,err,version anli_r1,acc,0.337,0.014955087918653607,0 anli_r2,acc,0.337,0.014955087918653605,0 anli_r3,acc,0.335,0.013630871843821474,0 arc_challenge,acc,0.23122866894197952,0.012320858834772273,0 arc_challenge,acc_norm,0.2636518771331058,0.012875929151297065,0 arc_easy,acc,0.5454545454545454,0.010217299762709419,0 arc_easy,acc_norm,0.5374579124579124,0.010230952104570801,0 boolq,acc,0.5657492354740061,0.008669116184243044,1 cb,acc,0.5892857142857143,0.06633634150359538,1 cb,f1,0.365874363327674,,1 copa,acc,0.71,0.04560480215720684,0 hellaswag,acc,0.36217884883489343,0.004796478664403837,0 hellaswag,acc_norm,0.4455287791276638,0.0049600825288524325,0 piqa,acc,0.705114254624592,0.010639030620156998,0 piqa,acc_norm,0.6974972796517954,0.010717199698083898,0 rte,acc,0.5667870036101083,0.029826764082138267,0 sciq,acc,0.9,0.009491579957525057,0 sciq,acc_norm,0.892,0.009820001651345694,0 storycloze_2016,acc,0.6419027258150721,0.011087006809925708,0 winogrande,acc,0.510655090765588,0.0140492945362904,0