task,metric,value,err,version anli_r1,acc,0.333,0.01491084616422986,0 anli_r2,acc,0.346,0.015050266127564448,0 anli_r3,acc,0.34833333333333333,0.013759437498874075,0 arc_challenge,acc,0.24573378839590443,0.012581033453730114,0 arc_challenge,acc_norm,0.2645051194539249,0.01288927294931337,0 arc_easy,acc,0.4823232323232323,0.01025336980569896,0 arc_easy,acc_norm,0.4553872053872054,0.010218861787618725,0 boolq,acc,0.4434250764525994,0.008688893661318225,1 cb,acc,0.44642857142857145,0.06703189227942397,1 cb,f1,0.39049382716049386,,1 copa,acc,0.62,0.04878317312145633,0 hellaswag,acc,0.3990240987851026,0.004886969266944277,0 hellaswag,acc_norm,0.4933280223063135,0.004989337148572078,0 piqa,acc,0.6817192600652884,0.010868093932082235,0 piqa,acc_norm,0.6817192600652884,0.010868093932082231,0 rte,acc,0.5342960288808665,0.030025579819366422,0 sciq,acc,0.743,0.013825416526895026,0 sciq,acc_norm,0.716,0.01426700906103131,0 storycloze_2016,acc,0.6493853554249065,0.011034317290463294,0 winogrande,acc,0.505130228887135,0.014051745961790516,0