task,metric,value,err,version anli_r1,acc,0.316,0.014709193056057107,0 anli_r2,acc,0.332,0.014899597242811478,0 anli_r3,acc,0.34,0.013680495725767803,0 arc_challenge,acc,0.34897610921501704,0.013928933461382497,0 arc_challenge,acc_norm,0.36860068259385664,0.014097810678042184,0 arc_easy,acc,0.6792929292929293,0.00957747457110883,0 arc_easy,acc_norm,0.6670875420875421,0.009669958978395326,0 boolq,acc,0.6516819571865443,0.008332942286688303,1 cb,acc,0.30357142857142855,0.06199938655510754,1 cb,f1,0.2236842105263158,,1 copa,acc,0.84,0.03684529491774709,0 hellaswag,acc,0.5338577972515435,0.004978328190775526,0 hellaswag,acc_norm,0.7099183429595698,0.004528723951878254,0 piqa,acc,0.7687704026115343,0.00983706318062533,0 piqa,acc_norm,0.7829162132752993,0.009618708415756785,0 rte,acc,0.5415162454873647,0.029992535385373314,0 sciq,acc,0.934,0.007855297938697587,0 sciq,acc_norm,0.925,0.008333333333333326,0 storycloze_2016,acc,0.7600213789417424,0.009875938525582594,0 winogrande,acc,0.6369376479873717,0.013515191866479221,0