task,metric,value,err,version anli_r1,acc,0.342,0.015008706182121731,0 anli_r2,acc,0.329,0.014865395385928367,0 anli_r3,acc,0.32666666666666666,0.013544340907003663,0 arc_challenge,acc,0.29180887372013653,0.013284525292403492,0 arc_challenge,acc_norm,0.31569965870307165,0.013582571095815291,0 arc_easy,acc,0.6266835016835017,0.00992500914280291,0 arc_easy,acc_norm,0.6153198653198653,0.009983171707009,0 boolq,acc,0.6162079510703364,0.008505584729104973,1 cb,acc,0.5892857142857143,0.0663363415035954,1 cb,f1,0.4120234604105572,,1 copa,acc,0.77,0.04229525846816507,0 hellaswag,acc,0.4666401115315674,0.004978662946687269,0 hellaswag,acc_norm,0.6238797052380004,0.004834207964061318,0 piqa,acc,0.750816104461371,0.010091882770120216,0 piqa,acc_norm,0.7611534276387377,0.009948120385337484,0 rte,acc,0.5018050541516246,0.030096267148976626,0 sciq,acc,0.907,0.00918887563499668,0 sciq,acc_norm,0.9,0.00949157995752505,0 storycloze_2016,acc,0.7242116515232496,0.010334748387645674,0 winogrande,acc,0.590370955011839,0.013821049109655483,0