task,metric,value,err,version anli_r1,acc,0.329,0.014865395385928355,0 anli_r2,acc,0.341,0.014998131348402704,0 anli_r3,acc,0.325,0.013526454480351028,0 arc_challenge,acc,0.29948805460750855,0.013385021637313565,0 arc_challenge,acc_norm,0.33276450511945393,0.01376986304619231,0 arc_easy,acc,0.6380471380471381,0.009860991466688486,0 arc_easy,acc_norm,0.625,0.009933992677987828,0 boolq,acc,0.6146788990825688,0.008511930879680645,1 cb,acc,0.25,0.058387420812114225,1 cb,f1,0.24860681114551084,,1 copa,acc,0.84,0.03684529491774711,0 hellaswag,acc,0.4788886675960964,0.004985331652408345,0 hellaswag,acc_norm,0.6285600477992431,0.004822022254886021,0 piqa,acc,0.7584330794341676,0.009986718001804463,0 piqa,acc_norm,0.7562568008705114,0.010017199471500609,0 rte,acc,0.48014440433212996,0.0300727231673172,0 sciq,acc,0.916,0.008776162089491122,0 sciq,acc_norm,0.9,0.009491579957525049,0 storycloze_2016,acc,0.7172634954569749,0.01041380648612127,0 winogrande,acc,0.590370955011839,0.013821049109655465,0