|
task,metric,value,err,version
|
|
anli_r1,acc,0.326,0.014830507204541044,0
|
|
anli_r2,acc,0.344,0.015029633724408947,0
|
|
anli_r3,acc,0.35583333333333333,0.013826518748493315,0
|
|
arc_challenge,acc,0.31143344709897613,0.013532472099850949,0
|
|
arc_challenge,acc_norm,0.32764505119453924,0.013715847940719344,0
|
|
arc_easy,acc,0.6418350168350169,0.009838331651451844,0
|
|
arc_easy,acc_norm,0.6300505050505051,0.009906656266021155,0
|
|
boolq,acc,0.5574923547400612,0.008687051315181374,1
|
|
cb,acc,0.5357142857142857,0.06724777654937658,1
|
|
cb,f1,0.35670453061757407,,1
|
|
copa,acc,0.74,0.04408440022768078,0
|
|
hellaswag,acc,0.45140410276837284,0.004966158142645416,0
|
|
hellaswag,acc_norm,0.5934076877116112,0.004901936511546116,0
|
|
piqa,acc,0.7388465723612623,0.010248738649935581,0
|
|
piqa,acc_norm,0.750272034820457,0.010099232969867483,0
|
|
rte,acc,0.5415162454873647,0.02999253538537331,0
|
|
sciq,acc,0.913,0.008916866630745925,0
|
|
sciq,acc_norm,0.911,0.009008893392651516,0
|
|
storycloze_2016,acc,0.6996258685195083,0.010600915927985026,0
|
|
winogrande,acc,0.5698500394632992,0.013914685094716692,0
|
|
|