|
task,metric,value,err,version
|
|
anli_r1,acc,0.314,0.014683991951087966,0
|
|
anli_r2,acc,0.326,0.014830507204541035,0
|
|
anli_r3,acc,0.355,0.013819249004047296,0
|
|
arc_challenge,acc,0.2090443686006826,0.01188274698740645,0
|
|
arc_challenge,acc_norm,0.25170648464163825,0.012682496334042968,0
|
|
arc_easy,acc,0.39057239057239057,0.010011059112064236,0
|
|
arc_easy,acc_norm,0.36658249158249157,0.009887786585323946,0
|
|
boolq,acc,0.5571865443425077,0.008687668766930832,1
|
|
cb,acc,0.32142857142857145,0.06297362289056341,1
|
|
cb,f1,0.28889599317988063,,1
|
|
copa,acc,0.73,0.044619604333847394,0
|
|
hellaswag,acc,0.4563831905994822,0.004970759774676886,0
|
|
hellaswag,acc_norm,0.5928101971718781,0.004903066639761947,0
|
|
piqa,acc,0.6322089227421109,0.011250616646678795,0
|
|
piqa,acc_norm,0.6311207834602829,0.011257546676908809,0
|
|
rte,acc,0.5956678700361011,0.029540420517619716,0
|
|
sciq,acc,0.703,0.014456832294801098,0
|
|
sciq,acc_norm,0.647,0.015120172605483697,0
|
|
storycloze_2016,acc,0.6520577231427044,0.011014779784784828,0
|
|
winogrande,acc,0.5619573796369376,0.013944181296470804,0
|
|
|