|
task,metric,value,err,version
|
|
anli_r1,acc,0.336,0.014944140233795023,0
|
|
anli_r2,acc,0.326,0.014830507204541037,0
|
|
anli_r3,acc,0.33,0.013579531277800922,0
|
|
arc_challenge,acc,0.2721843003412969,0.013006600406423707,0
|
|
arc_challenge,acc_norm,0.29436860068259385,0.013318528460539422,0
|
|
arc_easy,acc,0.5854377104377104,0.010108889212447769,0
|
|
arc_easy,acc_norm,0.5723905723905723,0.010151683397430677,0
|
|
boolq,acc,0.57217125382263,0.008653474894637182,1
|
|
cb,acc,0.25,0.058387420812114225,1
|
|
cb,f1,0.2095321637426901,,1
|
|
copa,acc,0.72,0.045126085985421276,0
|
|
hellaswag,acc,0.4051981676956781,0.004899270310557984,0
|
|
hellaswag,acc_norm,0.5231029675363473,0.004984452002563928,0
|
|
piqa,acc,0.721436343852013,0.010459397235965182,0
|
|
piqa,acc_norm,0.719260065288357,0.010484325438311827,0
|
|
rte,acc,0.49097472924187724,0.030091559826331334,0
|
|
sciq,acc,0.891,0.009859828407037188,0
|
|
sciq,acc_norm,0.883,0.010169287802713327,0
|
|
storycloze_2016,acc,0.6632816675574559,0.010928525619392455,0
|
|
winogrande,acc,0.5469613259668509,0.013990366632148104,0
|
|
|