|
task,metric,value,err,version
|
|
anli_r1,acc,0.326,0.014830507204541047,0
|
|
anli_r2,acc,0.356,0.015149042659306628,0
|
|
anli_r3,acc,0.335,0.013630871843821474,0
|
|
arc_challenge,acc,0.2713310580204778,0.012993807727545797,0
|
|
arc_challenge,acc_norm,0.2960750853242321,0.013340916085246263,0
|
|
arc_easy,acc,0.6073232323232324,0.010020646555538689,0
|
|
arc_easy,acc_norm,0.5862794612794613,0.01010587853023813,0
|
|
boolq,acc,0.5954128440366973,0.008584355308932687,1
|
|
cb,acc,0.5178571428571429,0.06737697508644648,1
|
|
cb,f1,0.43332988160574365,,1
|
|
copa,acc,0.71,0.045604802157206845,0
|
|
hellaswag,acc,0.45498904600677154,0.004969521827957945,0
|
|
hellaswag,acc_norm,0.5934076877116112,0.004901936511546108,0
|
|
piqa,acc,0.7421109902067464,0.010206956662056257,0
|
|
piqa,acc_norm,0.749183895538629,0.010113869547069044,0
|
|
rte,acc,0.48375451263537905,0.030080573208738064,0
|
|
sciq,acc,0.891,0.00985982840703719,0
|
|
sciq,acc_norm,0.869,0.010674874844837956,0
|
|
storycloze_2016,acc,0.6980224478888295,0.010616985436073357,0
|
|
winogrande,acc,0.5580110497237569,0.01395758407910899,0
|
|
|