task,metric,value,err,version anli_r1,acc,0.343,0.015019206922356951,0 anli_r2,acc,0.329,0.014865395385928364,0 anli_r3,acc,0.3333333333333333,0.0136139500102256,0 arc_challenge,acc,0.2551194539249147,0.012739038695202105,0 arc_challenge,acc_norm,0.2841296928327645,0.013179442447653886,0 arc_easy,acc,0.5660774410774411,0.01016979577046211,0 arc_easy,acc_norm,0.49873737373737376,0.010259750807991153,0 boolq,acc,0.5685015290519878,0.008662594569027309,1 cb,acc,0.39285714285714285,0.0658538889806635,1 cb,f1,0.20571590265987547,,1 copa,acc,0.73,0.044619604333847394,0 hellaswag,acc,0.44453296156144195,0.004958983318274571,0 hellaswag,acc_norm,0.5728938458474407,0.004936470085238491,0 piqa,acc,0.7404787812840044,0.010227939888173922,0 piqa,acc_norm,0.7459194776931447,0.010157271999135051,0 rte,acc,0.48014440433212996,0.0300727231673172,0 sciq,acc,0.802,0.01260773393417531,0 sciq,acc_norm,0.706,0.014414290540008215,0 storycloze_2016,acc,0.7017637626937466,0.010579249795578815,0 winogrande,acc,0.5619573796369376,0.013944181296470804,0