task,metric,value,err,version anli_r1,acc,0.329,0.014865395385928364,0 anli_r2,acc,0.326,0.01483050720454104,0 anli_r3,acc,0.3433333333333333,0.01371263383046586,0 arc_challenge,acc,0.26706484641638223,0.012928933196496354,0 arc_challenge,acc_norm,0.28924914675767915,0.013250012579393443,0 arc_easy,acc,0.6014309764309764,0.010046455400477937,0 arc_easy,acc_norm,0.531986531986532,0.010238767643185714,0 boolq,acc,0.5825688073394495,0.00862499005021668,1 cb,acc,0.375,0.06527912098338669,1 cb,f1,0.2127316680096696,,1 copa,acc,0.79,0.040936018074033256,0 hellaswag,acc,0.47400916152160927,0.00498303542023572,0 hellaswag,acc_norm,0.6269667396932882,0.004826224784850451,0 piqa,acc,0.7551686615886833,0.010032309105568795,0 piqa,acc_norm,0.763873775843308,0.009908965890558218,0 rte,acc,0.5595667870036101,0.02988212336311872,0 sciq,acc,0.85,0.011297239823409314,0 sciq,acc_norm,0.752,0.013663187134877637,0 storycloze_2016,acc,0.7167290219134153,0.010419760409155363,0 winogrande,acc,0.5864246250986582,0.013840971763195304,0