task,metric,value,err,version anli_r1,acc,0.343,0.015019206922356951,0 anli_r2,acc,0.325,0.014818724459095527,0 anli_r3,acc,0.33666666666666667,0.013647602942406393,0 arc_challenge,acc,0.30887372013651876,0.013501770929344003,0 arc_challenge,acc_norm,0.3165529010238908,0.01359243151906808,0 arc_easy,acc,0.617003367003367,0.009974920384536462,0 arc_easy,acc_norm,0.5761784511784511,0.01014000609521361,0 boolq,acc,0.617125382262997,0.008501734385335953,1 cb,acc,0.4642857142857143,0.06724777654937658,1 cb,f1,0.39080213903743316,,1 copa,acc,0.74,0.04408440022768079,0 hellaswag,acc,0.47759410476000796,0.004984768912326932,0 hellaswag,acc_norm,0.6294562836088429,0.004819633668832537,0 piqa,acc,0.750272034820457,0.010099232969867492,0 piqa,acc_norm,0.7584330794341676,0.009986718001804461,0 rte,acc,0.5523465703971119,0.029931070362939533,0 sciq,acc,0.906,0.009233052000787726,0 sciq,acc_norm,0.885,0.010093407594904628,0 storycloze_2016,acc,0.706574024585783,0.010529489334744471,0 winogrande,acc,0.5730071033938438,0.013901878072575057,0