task,metric,value,err,version anli_r1,acc,0.334,0.01492201952373296,0 anli_r2,acc,0.341,0.0149981313484027,0 anli_r3,acc,0.3358333333333333,0.013639261190932882,0 arc_challenge,acc,0.3148464163822526,0.013572657703084948,0 arc_challenge,acc_norm,0.34897610921501704,0.013928933461382496,0 arc_easy,acc,0.6599326599326599,0.009720765494805276,0 arc_easy,acc_norm,0.5984848484848485,0.010058790020755562,0 boolq,acc,0.5951070336391437,0.008585393347962317,1 cb,acc,0.39285714285714285,0.0658538889806635,1 cb,f1,0.18803418803418803,,1 copa,acc,0.82,0.038612291966536955,0 hellaswag,acc,0.5300736904999004,0.004980747448813311,0 hellaswag,acc_norm,0.7024497112129058,0.004562462665505219,0 piqa,acc,0.779107725788901,0.009679088048842217,0 piqa,acc_norm,0.7878128400435256,0.009539299828174044,0 rte,acc,0.5667870036101083,0.029826764082138274,0 sciq,acc,0.894,0.009739551265785141,0 sciq,acc_norm,0.828,0.011939788882495321,0 storycloze_2016,acc,0.7541421699625869,0.009957443066942233,0 winogrande,acc,0.6227308602999211,0.013622567928799503,0