task,metric,value,err,version anli_r1,acc,0.34,0.014987482264363937,0 anli_r2,acc,0.375,0.015316971293620996,0 anli_r3,acc,0.3358333333333333,0.013639261190932882,0 arc_challenge,acc,0.23293515358361774,0.012352507042617394,0 arc_challenge,acc_norm,0.26023890784982934,0.012821930225112556,0 arc_easy,acc,0.4718013468013468,0.010243454104071792,0 arc_easy,acc_norm,0.4553872053872054,0.010218861787618732,0 boolq,acc,0.4504587155963303,0.008702022442950874,1 cb,acc,0.5357142857142857,0.06724777654937658,1 cb,f1,0.538474366304555,,1 copa,acc,0.68,0.046882617226215034,0 hellaswag,acc,0.3951404102768373,0.004878816961012042,0 hellaswag,acc_norm,0.49372634933280224,0.0049893886134388,0 piqa,acc,0.6920565832426551,0.010770892367463689,0 piqa,acc_norm,0.6985854189336235,0.01070624824275376,0 rte,acc,0.5090252707581228,0.030091559826331334,0 sciq,acc,0.728,0.014078856992462615,0 sciq,acc_norm,0.711,0.014341711358296183,0 storycloze_2016,acc,0.6531266702298236,0.011006857922124124,0 winogrande,acc,0.5343330702446725,0.014019317531542569,0