task,metric,value,err,version anli_r1,acc,0.361,0.015195720118175124,0 anli_r2,acc,0.332,0.014899597242811475,0 anli_r3,acc,0.3441666666666667,0.013720551062295755,0 arc_challenge,acc,0.30631399317406144,0.013470584417276513,0 arc_challenge,acc_norm,0.32337883959044367,0.013669421630012122,0 arc_easy,acc,0.6245791245791246,0.0099362185271143,0 arc_easy,acc_norm,0.6199494949494949,0.009960175831493131,0 boolq,acc,0.6152905198776758,0.008509403073229692,1 cb,acc,0.44642857142857145,0.06703189227942398,1 cb,f1,0.3336203597397627,,1 copa,acc,0.77,0.04229525846816506,0 hellaswag,acc,0.4676359290977893,0.004979317515432522,0 hellaswag,acc_norm,0.6258713403704441,0.004829081532826523,0 piqa,acc,0.7529923830250272,0.010062268140772629,0 piqa,acc_norm,0.7540805223068553,0.010047331865625184,0 rte,acc,0.5054151624548736,0.030094698123239966,0 sciq,acc,0.913,0.008916866630745913,0 sciq,acc_norm,0.904,0.009320454434783217,0 storycloze_2016,acc,0.721004810261892,0.010371620932652793,0 winogrande,acc,0.5816890292028414,0.013863669961195908,0