task,metric,value,err,version anli_r1,acc,0.311,0.014645596385722695,0 anli_r2,acc,0.313,0.014671272822977886,0 anli_r3,acc,0.33166666666666667,0.013596836729485156,0 arc_challenge,acc,0.2568259385665529,0.0127669237941168,0 arc_challenge,acc_norm,0.30119453924914674,0.01340674176784762,0 arc_easy,acc,0.5555555555555556,0.01019625483869168,0 arc_easy,acc_norm,0.5366161616161617,0.01023223506393303,0 boolq,acc,0.6061162079510704,0.008545835792614982,1 cb,acc,0.3392857142857143,0.06384226561930828,1 cb,f1,0.23827865281885505,,1 copa,acc,0.8,0.04020151261036845,0 hellaswag,acc,0.46036646086436966,0.0049740806383642665,0 hellaswag,acc_norm,0.6048595897231627,0.00487881696101204,0 piqa,acc,0.719804134929271,0.010478122015577082,0 piqa,acc_norm,0.7181719260065288,0.010496675231258159,0 rte,acc,0.4981949458483754,0.030096267148976633,0 sciq,acc,0.852,0.011234866364235239,0 sciq,acc_norm,0.834,0.011772110370812185,0 storycloze_2016,acc,0.6755745590593266,0.01082613134499089,0 winogrande,acc,0.5580110497237569,0.013957584079109001,0