task,metric,value,err,version anli_r1,acc,0.317,0.01472167543888022,0 anli_r2,acc,0.328,0.014853842487270334,0 anli_r3,acc,0.33166666666666667,0.01359683672948517,0 arc_challenge,acc,0.3310580204778157,0.013752062419817836,0 arc_challenge,acc_norm,0.3583617747440273,0.014012883334859871,0 arc_easy,acc,0.680976430976431,0.009564133249441073,0 arc_easy,acc_norm,0.6616161616161617,0.009709034670525096,0 boolq,acc,0.6626911314984709,0.008269171495741617,1 cb,acc,0.19642857142857142,0.05357142857142859,1 cb,f1,0.1984379958880104,,1 copa,acc,0.82,0.03861229196653697,0 hellaswag,acc,0.530372435769767,0.004980566907790449,0 hellaswag,acc_norm,0.7078271260705039,0.004538319464111969,0 piqa,acc,0.7725788900979326,0.009779850767847239,0 piqa,acc_norm,0.7812840043525572,0.009644731932667563,0 rte,acc,0.5884476534296029,0.0296218322224172,0 sciq,acc,0.945,0.007212976294639238,0 sciq,acc_norm,0.934,0.007855297938697587,0 storycloze_2016,acc,0.757883484767504,0.009905870033193863,0 winogrande,acc,0.6298342541436464,0.013570454689603911,0