task,metric,value,err,version anli_r1,acc,0.324,0.014806864733738863,0 anli_r2,acc,0.373,0.01530049362292281,0 anli_r3,acc,0.3475,0.013751753243291852,0 arc_challenge,acc,0.2815699658703072,0.013143376735009026,0 arc_challenge,acc_norm,0.3225255972696246,0.01365998089427737,0 arc_easy,acc,0.6216329966329966,0.009951575683331949,0 arc_easy,acc_norm,0.6018518518518519,0.010044662374653396,0 boolq,acc,0.617125382262997,0.008501734385335953,1 cb,acc,0.5357142857142857,0.06724777654937658,1 cb,f1,0.466241360978203,,1 copa,acc,0.8,0.04020151261036845,0 hellaswag,acc,0.47231627165903206,0.004982127315605219,0 hellaswag,acc_norm,0.6231826329416451,0.004835981632401606,0 piqa,acc,0.7540805223068553,0.010047331865625193,0 piqa,acc_norm,0.7627856365614799,0.009924694933586371,0 rte,acc,0.6028880866425993,0.029452371378346828,0 sciq,acc,0.905,0.009276910103103286,0 sciq,acc_norm,0.891,0.009859828407037186,0 storycloze_2016,acc,0.7231427044361304,0.01034711289027692,0 winogrande,acc,0.5840568271507498,0.013852485356798252,0