task,metric,value,err,version anli_r1,acc,0.349,0.015080663991563097,0 anli_r2,acc,0.359,0.015177264224798596,0 anli_r3,acc,0.365,0.01390348598141358,0 arc_challenge,acc,0.23122866894197952,0.01232085883477228,0 arc_challenge,acc_norm,0.257679180887372,0.0127807705627684,0 arc_easy,acc,0.4890572390572391,0.010257326131172879,0 arc_easy,acc_norm,0.4701178451178451,0.01024144432288643,0 boolq,acc,0.5336391437308868,0.00872524086113114,1 cb,acc,0.5178571428571429,0.06737697508644648,1 cb,f1,0.327692720923986,,1 copa,acc,0.67,0.04725815626252607,0 hellaswag,acc,0.39404501095399325,0.004876459434619795,0 hellaswag,acc_norm,0.4907388966341366,0.0049889254105227685,0 piqa,acc,0.6920565832426551,0.010770892367463685,0 piqa,acc_norm,0.705658324265506,0.01063331147034751,0 rte,acc,0.5234657039711191,0.03006330041190266,0 sciq,acc,0.756,0.013588548437881423,0 sciq,acc_norm,0.734,0.013979965645145143,0 storycloze_2016,acc,0.6445750935328701,0.011068528452399879,0 winogrande,acc,0.5098658247829518,0.014049749833367592,0