|
task,metric,value,err,version
|
|
anli_r1,acc,0.332,0.014899597242811487,0
|
|
anli_r2,acc,0.337,0.014955087918653605,0
|
|
anli_r3,acc,0.3433333333333333,0.01371263383046586,0
|
|
arc_challenge,acc,0.2764505119453925,0.013069662474252425,0
|
|
arc_challenge,acc_norm,0.2960750853242321,0.013340916085246258,0
|
|
arc_easy,acc,0.5963804713804713,0.01006736896034822,0
|
|
arc_easy,acc_norm,0.5382996632996633,0.010229639820610512,0
|
|
boolq,acc,0.6296636085626911,0.008445882436783665,1
|
|
cb,acc,0.4107142857142857,0.0663363415035954,1
|
|
cb,f1,0.3312277706643904,,1
|
|
copa,acc,0.73,0.0446196043338474,0
|
|
hellaswag,acc,0.4765982871937861,0.004984313205791442,0
|
|
hellaswag,acc_norm,0.6216889065923122,0.004839746491523515,0
|
|
piqa,acc,0.750816104461371,0.010091882770120216,0
|
|
piqa,acc_norm,0.7589771490750816,0.009979042717267312,0
|
|
rte,acc,0.5740072202166066,0.02976495674177765,0
|
|
sciq,acc,0.853,0.011203415395160336,0
|
|
sciq,acc_norm,0.762,0.013473586661967222,0
|
|
storycloze_2016,acc,0.7194013896312133,0.010389809647288816,0
|
|
winogrande,acc,0.5714285714285714,0.013908353814606696,0
|
|
|