task,metric,value,err,version anli_r1,acc,0.314,0.014683991951087966,0 anli_r2,acc,0.326,0.014830507204541035,0 anli_r3,acc,0.355,0.013819249004047296,0 arc_challenge,acc,0.2090443686006826,0.01188274698740645,0 arc_challenge,acc_norm,0.25170648464163825,0.012682496334042968,0 arc_easy,acc,0.39057239057239057,0.010011059112064236,0 arc_easy,acc_norm,0.36658249158249157,0.009887786585323946,0 boolq,acc,0.5571865443425077,0.008687668766930832,1 cb,acc,0.32142857142857145,0.06297362289056341,1 cb,f1,0.28889599317988063,,1 copa,acc,0.73,0.044619604333847394,0 hellaswag,acc,0.4563831905994822,0.004970759774676886,0 hellaswag,acc_norm,0.5928101971718781,0.004903066639761947,0 piqa,acc,0.6322089227421109,0.011250616646678795,0 piqa,acc_norm,0.6311207834602829,0.011257546676908809,0 rte,acc,0.5956678700361011,0.029540420517619716,0 sciq,acc,0.703,0.014456832294801098,0 sciq,acc_norm,0.647,0.015120172605483697,0 storycloze_2016,acc,0.6520577231427044,0.011014779784784828,0 winogrande,acc,0.5619573796369376,0.013944181296470804,0