task,metric,value,err,version anli_r1,acc,0.332,0.014899597242811473,0 anli_r2,acc,0.332,0.01489959724281148,0 anli_r3,acc,0.3258333333333333,0.013535422043417462,0 arc_challenge,acc,0.2167235494880546,0.012040156713481189,0 arc_challenge,acc_norm,0.2593856655290102,0.012808273573927097,0 arc_easy,acc,0.5429292929292929,0.01022189756425605,0 arc_easy,acc_norm,0.5315656565656566,0.010239317603199512,0 boolq,acc,0.5559633027522936,0.00869010521492079,1 cb,acc,0.48214285714285715,0.0673769750864465,1 cb,f1,0.3268398268398269,,1 copa,acc,0.64,0.04824181513244218,0 hellaswag,acc,0.3599880501892053,0.004790155370993451,0 hellaswag,acc_norm,0.44911372236606256,0.004963872936857939,0 piqa,acc,0.6985854189336235,0.01070624824275376,0 piqa,acc_norm,0.6969532100108814,0.010722648689531501,0 rte,acc,0.5126353790613718,0.030086851767188564,0 sciq,acc,0.881,0.010244215145336662,0 sciq,acc_norm,0.877,0.010391293421849879,0 storycloze_2016,acc,0.6264029930518439,0.011186849693644696,0 winogrande,acc,0.5240726124704025,0.014036189665395134,0