|
task,metric,value,err,version
|
|
anli_r1,acc,0.319,0.014746404865473474,0
|
|
anli_r2,acc,0.343,0.015019206922356953,0
|
|
anli_r3,acc,0.3416666666666667,0.013696658778002519,0
|
|
arc_challenge,acc,0.29180887372013653,0.013284525292403503,0
|
|
arc_challenge,acc_norm,0.3174061433447099,0.01360223908803817,0
|
|
arc_easy,acc,0.6452020202020202,0.009817629113069696,0
|
|
arc_easy,acc_norm,0.6363636363636364,0.009870849346011758,0
|
|
boolq,acc,0.6107033639143731,0.00852801629098454,1
|
|
cb,acc,0.5357142857142857,0.06724777654937658,1
|
|
cb,f1,0.3757011576560449,,1
|
|
copa,acc,0.76,0.04292346959909282,0
|
|
hellaswag,acc,0.4500099581756622,0.004964779805180658,0
|
|
hellaswag,acc_norm,0.5928101971718781,0.0049030666397619485,0
|
|
piqa,acc,0.7399347116430903,0.010234893249061303,0
|
|
piqa,acc_norm,0.7513601741022851,0.01008451123429685,0
|
|
rte,acc,0.51985559566787,0.030072723167317177,0
|
|
sciq,acc,0.923,0.008434580140240643,0
|
|
sciq,acc_norm,0.915,0.00882342636694233,0
|
|
storycloze_2016,acc,0.6990913949759487,0.010606289538707334,0
|
|
winogrande,acc,0.5737963693764798,0.013898585965412338,0
|
|
|