|
task,metric,value,err,version
|
|
anli_r1,acc,0.338,0.014965960710224482,0
|
|
anli_r2,acc,0.333,0.014910846164229859,0
|
|
anli_r3,acc,0.33,0.013579531277800923,0
|
|
arc_challenge,acc,0.2781569965870307,0.013094469919538809,0
|
|
arc_challenge,acc_norm,0.29692832764505117,0.013352025976725223,0
|
|
arc_easy,acc,0.6123737373737373,0.009997307914447612,0
|
|
arc_easy,acc_norm,0.5517676767676768,0.01020464512685695,0
|
|
boolq,acc,0.5363914373088685,0.008721861424877866,1
|
|
cb,acc,0.39285714285714285,0.0658538889806635,1
|
|
cb,f1,0.29078164450800714,,1
|
|
copa,acc,0.76,0.04292346959909282,0
|
|
hellaswag,acc,0.4658434574785899,0.004978124945759852,0
|
|
hellaswag,acc_norm,0.6105357498506274,0.004866322258335979,0
|
|
piqa,acc,0.7589771490750816,0.009979042717267314,0
|
|
piqa,acc_norm,0.7665941240478781,0.009869247889520986,0
|
|
rte,acc,0.5306859205776173,0.03003973059219781,0
|
|
sciq,acc,0.831,0.011856625977890129,0
|
|
sciq,acc_norm,0.746,0.013772206565168537,0
|
|
storycloze_2016,acc,0.7081774452164618,0.01051258861619963,0
|
|
winogrande,acc,0.6108918705603789,0.013702520871485949,0
|
|
|