|
task,metric,value,err,version
|
|
anli_r1,acc,0.337,0.014955087918653603,0
|
|
anli_r2,acc,0.343,0.015019206922356953,0
|
|
anli_r3,acc,0.3425,0.013704669762934728,0
|
|
arc_challenge,acc,0.29436860068259385,0.013318528460539422,0
|
|
arc_challenge,acc_norm,0.3046075085324232,0.01344952210993249,0
|
|
arc_easy,acc,0.6102693602693603,0.010007169391797053,0
|
|
arc_easy,acc_norm,0.5315656565656566,0.010239317603199509,0
|
|
boolq,acc,0.5525993883792049,0.008696530539281539,1
|
|
cb,acc,0.39285714285714285,0.0658538889806635,1
|
|
cb,f1,0.19555555555555557,,1
|
|
copa,acc,0.78,0.04163331998932261,0
|
|
hellaswag,acc,0.47868950408285205,0.00498524726030409,0
|
|
hellaswag,acc_norm,0.6269667396932882,0.004826224784850451,0
|
|
piqa,acc,0.7578890097932536,0.009994371269104387,0
|
|
piqa,acc_norm,0.7611534276387377,0.009948120385337484,0
|
|
rte,acc,0.5090252707581228,0.030091559826331334,0
|
|
sciq,acc,0.851,0.011266140684632168,0
|
|
sciq,acc_norm,0.76,0.013512312258920831,0
|
|
storycloze_2016,acc,0.7177979690005345,0.010407834479647672,0
|
|
winogrande,acc,0.585635359116022,0.013844846232268563,0
|
|
|