|
task,metric,value,err,version
|
|
anli_r1,acc,0.333,0.01491084616422986,0
|
|
anli_r2,acc,0.332,0.014899597242811478,0
|
|
anli_r3,acc,0.33916666666666667,0.013672343491681822,0
|
|
arc_challenge,acc,0.35665529010238906,0.013998056902620203,0
|
|
arc_challenge,acc_norm,0.37627986348122866,0.014157022555407173,0
|
|
arc_easy,acc,0.7028619528619529,0.009377397867796849,0
|
|
arc_easy,acc_norm,0.6771885521885522,0.009593950220366737,0
|
|
boolq,acc,0.6486238532110091,0.00834978197660316,1
|
|
cb,acc,0.14285714285714285,0.04718416136255829,1
|
|
cb,f1,0.14017094017094014,,1
|
|
copa,acc,0.86,0.034873508801977725,0
|
|
hellaswag,acc,0.5324636526588329,0.004979252954977319,0
|
|
hellaswag,acc_norm,0.7127066321449911,0.004515748192605716,0
|
|
piqa,acc,0.764961915125136,0.00989314668880531,0
|
|
piqa,acc_norm,0.7840043525571273,0.009601236303553544,0
|
|
rte,acc,0.4981949458483754,0.030096267148976626,0
|
|
sciq,acc,0.938,0.007629823996280306,0
|
|
sciq,acc_norm,0.93,0.008072494358323508,0
|
|
storycloze_2016,acc,0.7546766435061465,0.009950137914623096,0
|
|
winogrande,acc,0.6195737963693765,0.013644727908656833,0
|
|
|