task,metric,value,err,version anli_r1,acc,0.323,0.01479492784334864,0 anli_r2,acc,0.302,0.014526080235459548,0 anli_r3,acc,0.3375,0.013655897185463648,0 arc_challenge,acc,0.29180887372013653,0.01328452529240351,0 arc_challenge,acc_norm,0.33532423208191126,0.013796182947785562,0 arc_easy,acc,0.6456228956228957,0.00981500403025175,0 arc_easy,acc_norm,0.6506734006734006,0.0097828534493993,0 boolq,acc,0.6223241590214067,0.008479309208281643,1 cb,acc,0.48214285714285715,0.0673769750864465,1 cb,f1,0.3081617086193746,,1 copa,acc,0.73,0.044619604333847394,0 hellaswag,acc,0.45269866560446126,0.004967402792744857,0 hellaswag,acc_norm,0.601274646484764,0.004886353563571844,0 piqa,acc,0.7388465723612623,0.010248738649935581,0 piqa,acc_norm,0.7459194776931447,0.010157271999135055,0 rte,acc,0.5126353790613718,0.030086851767188564,0 sciq,acc,0.931,0.00801893405031515,0 sciq,acc_norm,0.936,0.007743640226919298,0 storycloze_2016,acc,0.7097808658471406,0.010495529690730063,0 winogrande,acc,0.569060773480663,0.01391779662333596,0