task,metric,value,err,version anli_r1,acc,0.326,0.014830507204541047,0 anli_r2,acc,0.356,0.015149042659306628,0 anli_r3,acc,0.335,0.013630871843821474,0 arc_challenge,acc,0.2713310580204778,0.012993807727545797,0 arc_challenge,acc_norm,0.2960750853242321,0.013340916085246263,0 arc_easy,acc,0.6073232323232324,0.010020646555538689,0 arc_easy,acc_norm,0.5862794612794613,0.01010587853023813,0 boolq,acc,0.5954128440366973,0.008584355308932687,1 cb,acc,0.5178571428571429,0.06737697508644648,1 cb,f1,0.43332988160574365,,1 copa,acc,0.71,0.045604802157206845,0 hellaswag,acc,0.45498904600677154,0.004969521827957945,0 hellaswag,acc_norm,0.5934076877116112,0.004901936511546108,0 piqa,acc,0.7421109902067464,0.010206956662056257,0 piqa,acc_norm,0.749183895538629,0.010113869547069044,0 rte,acc,0.48375451263537905,0.030080573208738064,0 sciq,acc,0.891,0.00985982840703719,0 sciq,acc_norm,0.869,0.010674874844837956,0 storycloze_2016,acc,0.6980224478888295,0.010616985436073357,0 winogrande,acc,0.5580110497237569,0.01395758407910899,0