task,metric,value,err,version anli_r1,acc,0.327,0.014842213153411249,0 anli_r2,acc,0.35,0.015090650341444233,0 anli_r3,acc,0.325,0.013526454480351025,0 arc_challenge,acc,0.2841296928327645,0.013179442447653886,0 arc_challenge,acc_norm,0.30802047781569963,0.013491429517292038,0 arc_easy,acc,0.6372053872053872,0.009865936757013938,0 arc_easy,acc_norm,0.6077441077441077,0.010018744689650043,0 boolq,acc,0.6342507645259939,0.00842393006885078,1 cb,acc,0.4107142857142857,0.06633634150359541,1 cb,f1,0.26894586894586897,,1 copa,acc,0.82,0.038612291966536955,0 hellaswag,acc,0.4736108344951205,0.0049828269166871525,0 hellaswag,acc_norm,0.6298546106353317,0.004818566366066934,0 piqa,acc,0.7611534276387377,0.0099481203853375,0 piqa,acc_norm,0.7584330794341676,0.009986718001804451,0 rte,acc,0.5487364620938628,0.029953149241808943,0 sciq,acc,0.911,0.009008893392651528,0 sciq,acc_norm,0.885,0.010093407594904638,0 storycloze_2016,acc,0.7188669160876536,0.010395836091628108,0 winogrande,acc,0.585635359116022,0.013844846232268563,0