task,metric,value,err,version anli_r1,acc,0.332,0.014899597242811482,0 anli_r2,acc,0.324,0.014806864733738857,0 anli_r3,acc,0.3491666666666667,0.013767075395077247,0 arc_challenge,acc,0.27474402730375425,0.013044617212771227,0 arc_challenge,acc_norm,0.295221843003413,0.013329750293382316,0 arc_easy,acc,0.6018518518518519,0.010044662374653396,0 arc_easy,acc_norm,0.5286195286195287,0.010242962617927197,0 boolq,acc,0.6220183486238532,0.008480656964585246,1 cb,acc,0.42857142857142855,0.06672848092813058,1 cb,f1,0.36493558776167473,,1 copa,acc,0.77,0.042295258468165065,0 hellaswag,acc,0.4695279824736108,0.004980506329407586,0 hellaswag,acc_norm,0.6156144194383589,0.0048545552940175395,0 piqa,acc,0.750272034820457,0.010099232969867483,0 piqa,acc_norm,0.7600652883569097,0.009963625892809545,0 rte,acc,0.5451263537906137,0.029973636495415252,0 sciq,acc,0.832,0.011828605831454267,0 sciq,acc_norm,0.751,0.013681600278702296,0 storycloze_2016,acc,0.7199358631747729,0.01038376499392048,0 winogrande,acc,0.5832675611681136,0.013856250072796316,0