task,metric,value,err,version anli_r1,acc,0.312,0.014658474370509007,0 anli_r2,acc,0.342,0.01500870618212173,0 anli_r3,acc,0.32166666666666666,0.013490095282989521,0 arc_challenge,acc,0.2235494880546075,0.012174896631202607,0 arc_challenge,acc_norm,0.26023890784982934,0.012821930225112556,0 arc_easy,acc,0.5475589225589226,0.01021326586017139,0 arc_easy,acc_norm,0.5256734006734006,0.010246249665591229,0 boolq,acc,0.5256880733944954,0.008733506027183658,1 cb,acc,0.42857142857142855,0.06672848092813058,1 cb,f1,0.3971014492753624,,1 copa,acc,0.72,0.04512608598542127,0 hellaswag,acc,0.3690499900418243,0.0048156131443854,0 hellaswag,acc_norm,0.4477195777733519,0.004962429881904027,0 piqa,acc,0.7034820457018498,0.010656078922661153,0 piqa,acc_norm,0.7029379760609358,0.010661725404814778,0 rte,acc,0.47653429602888087,0.03006330041190266,0 sciq,acc,0.89,0.009899393819724444,0 sciq,acc_norm,0.889,0.009938701010583726,0 storycloze_2016,acc,0.6413682522715125,0.011090657465688191,0 winogrande,acc,0.5438042620363063,0.013998453610924324,0