task,metric,value,err,version anli_r1,acc,0.317,0.014721675438880241,0 anli_r2,acc,0.358,0.015167928865407559,0 anli_r3,acc,0.32166666666666666,0.013490095282989521,0 arc_challenge,acc,0.28498293515358364,0.013191348179838793,0 arc_challenge,acc_norm,0.3293515358361775,0.013734057652635474,0 arc_easy,acc,0.6418350168350169,0.009838331651451841,0 arc_easy,acc_norm,0.6296296296296297,0.009908978578665753,0 boolq,acc,0.6204892966360857,0.008487341975756834,1 cb,acc,0.5178571428571429,0.06737697508644647,1 cb,f1,0.3558162267839687,,1 copa,acc,0.77,0.04229525846816506,0 hellaswag,acc,0.44911372236606256,0.0049638729368579396,0 hellaswag,acc_norm,0.5975901214897431,0.004893814890208308,0 piqa,acc,0.7475516866158868,0.010135665547362362,0 piqa,acc_norm,0.7453754080522307,0.010164432237060494,0 rte,acc,0.49458483754512633,0.030094698123239966,0 sciq,acc,0.918,0.008680515615523727,0 sciq,acc_norm,0.919,0.00863212103213998,0 storycloze_2016,acc,0.6996258685195083,0.010600915927985021,0 winogrande,acc,0.5651144435674822,0.013932814110418025,0