task,metric,value,err,version anli_r1,acc,0.34,0.014987482264363937,0 anli_r2,acc,0.326,0.014830507204541033,0 anli_r3,acc,0.3458333333333333,0.013736245342311014,0 arc_challenge,acc,0.3165529010238908,0.01359243151906808,0 arc_challenge,acc_norm,0.3370307167235495,0.013813476652902274,0 arc_easy,acc,0.6426767676767676,0.009833205612463114,0 arc_easy,acc_norm,0.6426767676767676,0.009833205612463106,0 boolq,acc,0.5801223241590214,0.008632045504781744,1 cb,acc,0.5178571428571429,0.06737697508644648,1 cb,f1,0.33534439416792355,,1 copa,acc,0.73,0.044619604333847394,0 hellaswag,acc,0.45030870344552876,0.0049650784774355715,0 hellaswag,acc_norm,0.60017924716192,0.004888601874547486,0 piqa,acc,0.7584330794341676,0.009986718001804461,0 piqa,acc_norm,0.7600652883569097,0.009963625892809545,0 rte,acc,0.48375451263537905,0.030080573208738064,0 sciq,acc,0.932,0.007964887911291603,0 sciq,acc_norm,0.929,0.008125578442487914,0 storycloze_2016,acc,0.7012292891501871,0.010584692134739974,0 winogrande,acc,0.5674822415153907,0.013923911578623827,0