task,metric,value,err,version anli_r1,acc,0.304,0.014553205687950436,0 anli_r2,acc,0.319,0.01474640486547348,0 anli_r3,acc,0.335,0.013630871843821465,0 arc_challenge,acc,0.3412969283276451,0.013855831287497719,0 arc_challenge,acc_norm,0.36006825938566556,0.014027516814585186,0 arc_easy,acc,0.6898148148148148,0.009491721291998517,0 arc_easy,acc_norm,0.6734006734006734,0.00962304703826765,0 boolq,acc,0.637308868501529,0.008408838061823179,1 cb,acc,0.35714285714285715,0.06460957383809221,1 cb,f1,0.20317460317460315,,1 copa,acc,0.84,0.03684529491774709,0 hellaswag,acc,0.5318661621190998,0.004979637330230314,0 hellaswag,acc_norm,0.7107149970125473,0.004525037849178835,0 piqa,acc,0.7763873775843307,0.009721489519176299,0 piqa,acc_norm,0.7850924918389554,0.009583665082653308,0 rte,acc,0.5776173285198556,0.02973162264649588,0 sciq,acc,0.928,0.008178195576218681,0 sciq,acc_norm,0.921,0.008534156773333463,0 storycloze_2016,acc,0.757883484767504,0.009905870033193874,0 winogrande,acc,0.6187845303867403,0.01365017216416031,0