|
task,metric,value,err,version
|
|
anli_r1,acc,0.349,0.015080663991563098,0
|
|
anli_r2,acc,0.345,0.015039986742055242,0
|
|
anli_r3,acc,0.3466666666666667,0.013744022550571956,0
|
|
arc_challenge,acc,0.3395904436860068,0.01383903976282016,0
|
|
arc_challenge,acc_norm,0.3660409556313993,0.014077223108470142,0
|
|
arc_easy,acc,0.6839225589225589,0.009540440071928283,0
|
|
arc_easy,acc_norm,0.6683501683501684,0.009660733780923948,0
|
|
boolq,acc,0.6590214067278287,0.00829097981816109,1
|
|
cb,acc,0.48214285714285715,0.0673769750864465,1
|
|
cb,f1,0.43206548866926225,,1
|
|
copa,acc,0.84,0.03684529491774709,0
|
|
hellaswag,acc,0.5317665803624776,0.004979700695747948,0
|
|
hellaswag,acc_norm,0.7076279625572595,0.004539227260397019,0
|
|
piqa,acc,0.7905331882480957,0.009494302979819794,0
|
|
piqa,acc_norm,0.7927094668117519,0.009457844699952372,0
|
|
rte,acc,0.51985559566787,0.030072723167317177,0
|
|
sciq,acc,0.935,0.007799733061832017,0
|
|
sciq,acc_norm,0.929,0.008125578442487916,0
|
|
storycloze_2016,acc,0.7573490112239444,0.009913300265342056,0
|
|
winogrande,acc,0.6432517758484609,0.013463393958028726,0
|
|
|