task,metric,value,err,version anli_r1,acc,0.347,0.015060472031706622,0 anli_r2,acc,0.361,0.015195720118175127,0 anli_r3,acc,0.34,0.013680495725767787,0 arc_challenge,acc,0.30119453924914674,0.01340674176784762,0 arc_challenge,acc_norm,0.3370307167235495,0.013813476652902269,0 arc_easy,acc,0.6283670033670034,0.009915897123658788,0 arc_easy,acc_norm,0.5972222222222222,0.010063960494989163,0 boolq,acc,0.5984709480122324,0.008573784490094754,1 cb,acc,0.375,0.06527912098338669,1 cb,f1,0.266719222178426,,1 copa,acc,0.82,0.038612291966536955,0 hellaswag,acc,0.47400916152160927,0.004983035420235718,0 hellaswag,acc_norm,0.6275642302330213,0.004824655406075563,0 piqa,acc,0.7546245919477693,0.010039831320422396,0 piqa,acc_norm,0.766050054406964,0.009877236895137446,0 rte,acc,0.4981949458483754,0.030096267148976626,0 sciq,acc,0.911,0.009008893392651518,0 sciq,acc_norm,0.898,0.009575368801653892,0 storycloze_2016,acc,0.7247461250668092,0.010328538400500567,0 winogrande,acc,0.584846093133386,0.013848684086658585,0