|
task,metric,value,err,version
|
|
anli_r1,acc,0.317,0.01472167543888022,0
|
|
anli_r2,acc,0.328,0.014853842487270334,0
|
|
anli_r3,acc,0.33166666666666667,0.01359683672948517,0
|
|
arc_challenge,acc,0.3310580204778157,0.013752062419817836,0
|
|
arc_challenge,acc_norm,0.3583617747440273,0.014012883334859871,0
|
|
arc_easy,acc,0.680976430976431,0.009564133249441073,0
|
|
arc_easy,acc_norm,0.6616161616161617,0.009709034670525096,0
|
|
boolq,acc,0.6626911314984709,0.008269171495741617,1
|
|
cb,acc,0.19642857142857142,0.05357142857142859,1
|
|
cb,f1,0.1984379958880104,,1
|
|
copa,acc,0.82,0.03861229196653697,0
|
|
hellaswag,acc,0.530372435769767,0.004980566907790449,0
|
|
hellaswag,acc_norm,0.7078271260705039,0.004538319464111969,0
|
|
piqa,acc,0.7725788900979326,0.009779850767847239,0
|
|
piqa,acc_norm,0.7812840043525572,0.009644731932667563,0
|
|
rte,acc,0.5884476534296029,0.0296218322224172,0
|
|
sciq,acc,0.945,0.007212976294639238,0
|
|
sciq,acc_norm,0.934,0.007855297938697587,0
|
|
storycloze_2016,acc,0.757883484767504,0.009905870033193863,0
|
|
winogrande,acc,0.6298342541436464,0.013570454689603911,0
|
|
|