task,metric,value,err,version anli_r1,acc,0.338,0.014965960710224473,0 anli_r2,acc,0.328,0.014853842487270333,0 anli_r3,acc,0.3425,0.013704669762934728,0 arc_challenge,acc,0.2815699658703072,0.01314337673500901,0 arc_challenge,acc_norm,0.3242320819112628,0.01367881039951882,0 arc_easy,acc,0.5984848484848485,0.010058790020755572,0 arc_easy,acc_norm,0.571969696969697,0.01015294331642626,0 boolq,acc,0.6100917431192661,0.00853043797286262,1 cb,acc,0.4642857142857143,0.0672477765493766,1 cb,f1,0.26271604938271603,,1 copa,acc,0.8,0.040201512610368445,0 hellaswag,acc,0.46863174666401114,0.0049799521665955405,0 hellaswag,acc_norm,0.6181039633539136,0.004848583243606704,0 piqa,acc,0.7393906420021763,0.010241826155811627,0 piqa,acc_norm,0.7383025027203483,0.010255630772708229,0 rte,acc,0.5126353790613718,0.030086851767188564,0 sciq,acc,0.864,0.010845350230472995,0 sciq,acc_norm,0.862,0.01091215263250441,0 storycloze_2016,acc,0.6958845537145911,0.010638172655194789,0 winogrande,acc,0.5643251775848461,0.013935709739615715,0