task,metric,value,err,version anli_r1,acc,0.338,0.014965960710224482,0 anli_r2,acc,0.371,0.015283736211823188,0 anli_r3,acc,0.3416666666666667,0.013696658778002514,0 arc_challenge,acc,0.23976109215017063,0.012476304127453946,0 arc_challenge,acc_norm,0.26706484641638223,0.012928933196496352,0 arc_easy,acc,0.49284511784511786,0.01025873302244637,0 arc_easy,acc_norm,0.47685185185185186,0.010248782484554474,0 boolq,acc,0.5137614678899083,0.008741742106878655,1 cb,acc,0.5,0.06741998624632421,1 cb,f1,0.3419913419913419,,1 copa,acc,0.65,0.047937248544110196,0 hellaswag,acc,0.3921529575781717,0.004872326888655522,0 hellaswag,acc_norm,0.4898426608245369,0.004988751698341149,0 piqa,acc,0.705658324265506,0.0106333114703475,0 piqa,acc_norm,0.7040261153427638,0.010650414317148131,0 rte,acc,0.5776173285198556,0.029731622646495887,0 sciq,acc,0.749,0.013718133516888923,0 sciq,acc_norm,0.74,0.013877773329774166,0 storycloze_2016,acc,0.6467129877071085,0.011053474766125627,0 winogrande,acc,0.5209155485398579,0.014040185494212949,0