task,metric,value,err,version anli_r1,acc,0.346,0.015050266127564434,0 anli_r2,acc,0.363,0.015213890444671281,0 anli_r3,acc,0.3458333333333333,0.013736245342311012,0 arc_challenge,acc,0.29948805460750855,0.013385021637313576,0 arc_challenge,acc_norm,0.3250853242320819,0.013688147309729122,0 arc_easy,acc,0.6397306397306397,0.009851002584732383,0 arc_easy,acc_norm,0.627104377104377,0.009922743197129241,0 boolq,acc,0.5740061162079511,0.008648732832949143,1 cb,acc,0.3392857142857143,0.06384226561930827,1 cb,f1,0.3177045177045177,,1 copa,acc,0.83,0.03775251680686371,0 hellaswag,acc,0.47938657637920734,0.004985539159783413,0 hellaswag,acc_norm,0.633240390360486,0.004809352075008956,0 piqa,acc,0.7535364526659413,0.010054810789671822,0 piqa,acc_norm,0.7704026115342764,0.009812682950815183,0 rte,acc,0.5523465703971119,0.02993107036293953,0 sciq,acc,0.914,0.008870325962594766,0 sciq,acc_norm,0.906,0.009233052000787733,0 storycloze_2016,acc,0.729021913415286,0.010278188399635044,0 winogrande,acc,0.6029992107340174,0.0137510925198067,0