task,metric,value,err,version anli_r1,acc,0.312,0.014658474370509005,0 anli_r2,acc,0.332,0.014899597242811485,0 anli_r3,acc,0.3258333333333333,0.013535422043417464,0 arc_challenge,acc,0.2935153583617747,0.013307250444941127,0 arc_challenge,acc_norm,0.3148464163822526,0.01357265770308495,0 arc_easy,acc,0.6346801346801347,0.009880576614806924,0 arc_easy,acc_norm,0.6292087542087542,0.009911292822056918,0 boolq,acc,0.637308868501529,0.008408838061823179,1 cb,acc,0.4642857142857143,0.0672477765493766,1 cb,f1,0.26842105263157895,,1 copa,acc,0.79,0.040936018074033256,0 hellaswag,acc,0.4759012148974308,0.004983982396187366,0 hellaswag,acc_norm,0.6352320254929297,0.0048038126319949696,0 piqa,acc,0.763873775843308,0.009908965890558211,0 piqa,acc_norm,0.763873775843308,0.009908965890558218,0 rte,acc,0.5415162454873647,0.029992535385373314,0 sciq,acc,0.914,0.008870325962594766,0 sciq,acc_norm,0.914,0.008870325962594766,0 storycloze_2016,acc,0.7231427044361304,0.010347112890276924,0 winogrande,acc,0.5974743488555643,0.01378286683170305,0