task,metric,value,err,version anli_r1,acc,0.352,0.015110404505648677,0 anli_r2,acc,0.343,0.015019206922356953,0 anli_r3,acc,0.3425,0.013704669762934722,0 arc_challenge,acc,0.2696245733788396,0.01296804068686915,0 arc_challenge,acc_norm,0.2986348122866894,0.013374078615068756,0 arc_easy,acc,0.5913299663299664,0.010087174498762883,0 arc_easy,acc_norm,0.5627104377104377,0.010178768429321595,0 boolq,acc,0.5825688073394495,0.00862499005021668,1 cb,acc,0.5357142857142857,0.06724777654937658,1 cb,f1,0.30043290043290044,,1 copa,acc,0.76,0.04292346959909282,0 hellaswag,acc,0.4446325433180641,0.004959094146471523,0 hellaswag,acc_norm,0.589523999203346,0.004909148239488278,0 piqa,acc,0.7285092491838956,0.010376251176596135,0 piqa,acc_norm,0.7399347116430903,0.010234893249061287,0 rte,acc,0.51985559566787,0.030072723167317177,0 sciq,acc,0.874,0.010499249222408046,0 sciq,acc_norm,0.843,0.01151014697923019,0 storycloze_2016,acc,0.6953500801710315,0.010643426988646806,0 winogrande,acc,0.56353591160221,0.01393856946567702,0