task,metric,value,err,version anli_r1,acc,0.306,0.014580006055436967,0 anli_r2,acc,0.333,0.014910846164229863,0 anli_r3,acc,0.31916666666666665,0.013462309712005143,0 arc_challenge,acc,0.3003412969283277,0.013395909309957,0 arc_challenge,acc_norm,0.3319112627986348,0.013760988200880536,0 arc_easy,acc,0.6279461279461279,0.009918187193096471,0 arc_easy,acc_norm,0.6069023569023569,0.010022540618945315,0 boolq,acc,0.6165137614678899,0.008504304838837027,1 cb,acc,0.17857142857142858,0.051642771820087224,1 cb,f1,0.16652752931822698,,1 copa,acc,0.82,0.038612291966536955,0 hellaswag,acc,0.4774945230033858,0.00498472423511512,0 hellaswag,acc_norm,0.6274646484763992,0.004824917516374197,0 piqa,acc,0.7535364526659413,0.01005481078967182,0 piqa,acc_norm,0.7633297062023939,0.009916841655042809,0 rte,acc,0.4981949458483754,0.030096267148976633,0 sciq,acc,0.911,0.009008893392651523,0 sciq,acc_norm,0.891,0.00985982840703719,0 storycloze_2016,acc,0.7156600748262961,0.010431614128665244,0 winogrande,acc,0.6029992107340174,0.013751092519806704,0