|
task,metric,value,err,version
|
|
anli_r1,acc,0.331,0.014888272588203931,0
|
|
anli_r2,acc,0.329,0.014865395385928369,0
|
|
anli_r3,acc,0.3416666666666667,0.013696658778002512,0
|
|
arc_challenge,acc,0.2909556313993174,0.013273077865907578,0
|
|
arc_challenge,acc_norm,0.3165529010238908,0.013592431519068079,0
|
|
arc_easy,acc,0.6292087542087542,0.009911292822056925,0
|
|
arc_easy,acc_norm,0.6047979797979798,0.01003189405279098,0
|
|
boolq,acc,0.6073394495412844,0.008541161248702913,1
|
|
cb,acc,0.4107142857142857,0.0663363415035954,1
|
|
cb,f1,0.2951144094001237,,1
|
|
copa,acc,0.74,0.04408440022768079,0
|
|
hellaswag,acc,0.4667396932881896,0.0049787293000748915,0
|
|
hellaswag,acc_norm,0.6128261302529376,0.004861084534087031,0
|
|
piqa,acc,0.7464635473340587,0.01015009083455179,0
|
|
piqa,acc_norm,0.7557127312295974,0.01002476517228425,0
|
|
rte,acc,0.5306859205776173,0.030039730592197812,0
|
|
sciq,acc,0.904,0.009320454434783215,0
|
|
sciq,acc_norm,0.881,0.010244215145336666,0
|
|
storycloze_2016,acc,0.7194013896312133,0.01038980964728882,0
|
|
winogrande,acc,0.5895816890292028,0.013825107120035863,0
|
|
|