task,metric,value,err,version anli_r1,acc,0.347,0.015060472031706613,0 anli_r2,acc,0.328,0.01485384248727033,0 anli_r3,acc,0.3475,0.013751753243291856,0 arc_challenge,acc,0.2764505119453925,0.013069662474252427,0 arc_challenge,acc_norm,0.28754266211604096,0.013226719056266132,0 arc_easy,acc,0.6077441077441077,0.010018744689650043,0 arc_easy,acc_norm,0.5833333333333334,0.01011628297778126,0 boolq,acc,0.6064220183486239,0.00854467241848691,1 cb,acc,0.5178571428571429,0.06737697508644647,1 cb,f1,0.3578242744909412,,1 copa,acc,0.75,0.04351941398892446,0 hellaswag,acc,0.4539932284405497,0.00496861353930925,0 hellaswag,acc_norm,0.5944035052778331,0.0049000362613090395,0 piqa,acc,0.73449401523395,0.010303308653024429,0 piqa,acc_norm,0.7442872687704026,0.010178690109459878,0 rte,acc,0.48736462093862815,0.030086851767188564,0 sciq,acc,0.899,0.009533618929340983,0 sciq,acc_norm,0.886,0.010055103435823332,0 storycloze_2016,acc,0.6985569214323891,0.010611646032767584,0 winogrande,acc,0.5643251775848461,0.013935709739615713,0