task,metric,value,err,version anli_r1,acc,0.352,0.015110404505648668,0 anli_r2,acc,0.327,0.014842213153411247,0 anli_r3,acc,0.3458333333333333,0.013736245342311012,0 arc_challenge,acc,0.27474402730375425,0.013044617212771227,0 arc_challenge,acc_norm,0.30887372013651876,0.013501770929344003,0 arc_easy,acc,0.6153198653198653,0.00998317170700901,0 arc_easy,acc_norm,0.5989057239057239,0.010057051106534385,0 boolq,acc,0.5960244648318043,0.008582268854021401,1 cb,acc,0.44642857142857145,0.06703189227942397,1 cb,f1,0.3134878193701723,,1 copa,acc,0.79,0.040936018074033256,0 hellaswag,acc,0.4457279426409082,0.004960299952519412,0 hellaswag,acc_norm,0.5867357100179247,0.00491413085543178,0 piqa,acc,0.7301414581066377,0.0103565954218522,0 piqa,acc_norm,0.7312295973884657,0.01034339294009,0 rte,acc,0.5415162454873647,0.029992535385373314,0 sciq,acc,0.891,0.009859828407037191,0 sciq,acc_norm,0.863,0.010878848714333316,0 storycloze_2016,acc,0.7071084981293426,0.010523873293246309,0 winogrande,acc,0.5730071033938438,0.013901878072575055,0