task,metric,value,err,version anli_r1,acc,0.344,0.015029633724408943,0 anli_r2,acc,0.326,0.01483050720454104,0 anli_r3,acc,0.36083333333333334,0.01386918025244486,0 arc_challenge,acc,0.30631399317406144,0.013470584417276511,0 arc_challenge,acc_norm,0.3225255972696246,0.013659980894277373,0 arc_easy,acc,0.6334175084175084,0.009887786585323946,0 arc_easy,acc_norm,0.617003367003367,0.009974920384536482,0 boolq,acc,0.617737003058104,0.008499149690449273,1 cb,acc,0.6428571428571429,0.06460957383809221,1 cb,f1,0.4430260047281324,,1 copa,acc,0.76,0.04292346959909282,0 hellaswag,acc,0.466938856801434,0.004978861409119807,0 hellaswag,acc_norm,0.6233817964548894,0.0048354759576109425,0 piqa,acc,0.7448313384113167,0.010171571592521822,0 piqa,acc_norm,0.7616974972796517,0.009940334245876219,0 rte,acc,0.5667870036101083,0.02982676408213827,0 sciq,acc,0.909,0.009099549538400243,0 sciq,acc_norm,0.903,0.009363689373248125,0 storycloze_2016,acc,0.7284874398717264,0.010284547617192592,0 winogrande,acc,0.6029992107340174,0.0137510925198067,0