task,metric,value,err,version anli_r1,acc,0.353,0.015120172605483699,0 anli_r2,acc,0.36,0.015186527932040127,0 anli_r3,acc,0.3641666666666667,0.013896714966807265,0 arc_challenge,acc,0.3003412969283277,0.013395909309956999,0 arc_challenge,acc_norm,0.3148464163822526,0.01357265770308495,0 arc_easy,acc,0.6233164983164983,0.00994284807747617,0 arc_easy,acc_norm,0.6077441077441077,0.010018744689650043,0 boolq,acc,0.6220183486238532,0.008480656964585248,1 cb,acc,0.5178571428571429,0.06737697508644647,1 cb,f1,0.4129474011826953,,1 copa,acc,0.78,0.041633319989322626,0 hellaswag,acc,0.466938856801434,0.004978861409119803,0 hellaswag,acc_norm,0.6222863971320454,0.004838246410786256,0 piqa,acc,0.7524483133841132,0.010069703966857106,0 piqa,acc_norm,0.7584330794341676,0.009986718001804453,0 rte,acc,0.49097472924187724,0.030091559826331334,0 sciq,acc,0.904,0.009320454434783248,0 sciq,acc_norm,0.898,0.009575368801653897,0 storycloze_2016,acc,0.7300908605024051,0.01026541350322146,0 winogrande,acc,0.585635359116022,0.013844846232268563,0