task,metric,value,err,version anli_r1,acc,0.34,0.014987482264363935,0 anli_r2,acc,0.347,0.015060472031706617,0 anli_r3,acc,0.3325,0.01360541734571053,0 arc_challenge,acc,0.30119453924914674,0.01340674176784762,0 arc_challenge,acc_norm,0.32337883959044367,0.013669421630012123,0 arc_easy,acc,0.6477272727272727,0.009801753933112778,0 arc_easy,acc_norm,0.6199494949494949,0.009960175831493124,0 boolq,acc,0.6214067278287462,0.00848334171802448,1 cb,acc,0.32142857142857145,0.06297362289056341,1 cb,f1,0.1621621621621622,,1 copa,acc,0.82,0.038612291966536955,0 hellaswag,acc,0.48008364867556264,0.0049858213361464,0 hellaswag,acc_norm,0.6368253335988847,0.00479931720990201,0 piqa,acc,0.7524483133841132,0.01006970396685711,0 piqa,acc_norm,0.7747551686615887,0.009746643471032136,0 rte,acc,0.5595667870036101,0.029882123363118716,0 sciq,acc,0.918,0.008680515615523722,0 sciq,acc_norm,0.914,0.008870325962594766,0 storycloze_2016,acc,0.7279529663281668,0.010290888060871242,0 winogrande,acc,0.6179952644041041,0.013655578215970424,0