task,metric,value,err,version anli_r1,acc,0.323,0.014794927843348633,0 anli_r2,acc,0.317,0.014721675438880236,0 anli_r3,acc,0.3625,0.013883037874225516,0 arc_challenge,acc,0.2790102389078498,0.013106784883601333,0 arc_challenge,acc_norm,0.30802047781569963,0.013491429517292038,0 arc_easy,acc,0.5942760942760943,0.010075755540128873,0 arc_easy,acc_norm,0.5757575757575758,0.010141333654958552,0 boolq,acc,0.5755351681957187,0.008644688121685498,1 cb,acc,0.35714285714285715,0.06460957383809221,1 cb,f1,0.19573820395738203,,1 copa,acc,0.79,0.040936018074033256,0 hellaswag,acc,0.4592710615415256,0.004973199296339971,0 hellaswag,acc_norm,0.6106353316072496,0.00486609688094144,0 piqa,acc,0.7540805223068553,0.010047331865625194,0 piqa,acc_norm,0.7589771490750816,0.009979042717267314,0 rte,acc,0.5126353790613718,0.030086851767188564,0 sciq,acc,0.835,0.01174363286691616,0 sciq,acc_norm,0.788,0.01293148186493805,0 storycloze_2016,acc,0.7194013896312133,0.01038980964728882,0 winogrande,acc,0.585635359116022,0.013844846232268565,0