task,metric,value,err,version anli_r1,acc,0.338,0.014965960710224482,0 anli_r2,acc,0.379,0.01534909100222535,0 anli_r3,acc,0.35333333333333333,0.013804572162314937,0 arc_challenge,acc,0.29180887372013653,0.013284525292403506,0 arc_challenge,acc_norm,0.3054607508532423,0.013460080478002505,0 arc_easy,acc,0.6460437710437711,0.009812370644174426,0 arc_easy,acc_norm,0.6241582491582491,0.009938436373170616,0 boolq,acc,0.636085626911315,0.008414918909128852,1 cb,acc,0.44642857142857145,0.06703189227942398,1 cb,f1,0.2940620782726046,,1 copa,acc,0.82,0.03861229196653697,0 hellaswag,acc,0.476000796654053,0.004984030250507291,0 hellaswag,acc_norm,0.6342362079267079,0.004806593424942258,0 piqa,acc,0.7589771490750816,0.009979042717267314,0 piqa,acc_norm,0.7600652883569097,0.009963625892809545,0 rte,acc,0.5234657039711191,0.03006330041190266,0 sciq,acc,0.918,0.008680515615523725,0 sciq,acc_norm,0.914,0.008870325962594766,0 storycloze_2016,acc,0.7284874398717264,0.010284547617192592,0 winogrande,acc,0.6101026045777427,0.013707547317008463,0