task,metric,value,err,version anli_r1,acc,0.341,0.0149981313484027,0 anli_r2,acc,0.328,0.014853842487270336,0 anli_r3,acc,0.3425,0.013704669762934725,0 arc_challenge,acc,0.2713310580204778,0.0129938077275458,0 arc_challenge,acc_norm,0.29436860068259385,0.013318528460539424,0 arc_easy,acc,0.5917508417508418,0.010085566195791248,0 arc_easy,acc_norm,0.5517676767676768,0.010204645126856942,0 boolq,acc,0.6192660550458715,0.008492625561656213,1 cb,acc,0.375,0.06527912098338669,1 cb,f1,0.24603174603174607,,1 copa,acc,0.77,0.04229525846816506,0 hellaswag,acc,0.45140410276837284,0.004966158142645419,0 hellaswag,acc_norm,0.5957976498705437,0.004897340793314379,0 piqa,acc,0.7410228509249184,0.01022096603140561,0 piqa,acc_norm,0.749727965179543,0.01010656188008977,0 rte,acc,0.5090252707581228,0.030091559826331334,0 sciq,acc,0.877,0.010391293421849874,0 sciq,acc_norm,0.83,0.01188449583454167,0 storycloze_2016,acc,0.6958845537145911,0.010638172655194796,0 winogrande,acc,0.5706393054459353,0.013911537499969165,0