task,metric,value,err,version anli_r1,acc,0.328,0.014853842487270334,0 anli_r2,acc,0.352,0.015110404505648668,0 anli_r3,acc,0.34,0.013680495725767789,0 arc_challenge,acc,0.22696245733788395,0.01224049153613287,0 arc_challenge,acc_norm,0.2525597269624573,0.012696728980207704,0 arc_easy,acc,0.48063973063973064,0.01025208949116552,0 arc_easy,acc_norm,0.46675084175084175,0.010237073872130747,0 boolq,acc,0.43241590214067277,0.008664798701065797,1 cb,acc,0.39285714285714285,0.0658538889806635,1 cb,f1,0.23650793650793656,,1 copa,acc,0.67,0.04725815626252609,0 hellaswag,acc,0.3972316271659032,0.00488324657949666,0 hellaswag,acc_norm,0.4969129655447122,0.004989686307484565,0 piqa,acc,0.690424374319913,0.010786656752183345,0 piqa,acc_norm,0.6958650707290533,0.010733493335721314,0 rte,acc,0.51985559566787,0.030072723167317177,0 sciq,acc,0.746,0.013772206565168544,0 sciq,acc_norm,0.722,0.014174516461485256,0 storycloze_2016,acc,0.6515232495991449,0.011018717784788488,0 winogrande,acc,0.5059194948697711,0.014051500838485807,0