task,metric,value,err,version anli_r1,acc,0.323,0.014794927843348637,0 anli_r2,acc,0.325,0.014818724459095527,0 anli_r3,acc,0.33666666666666667,0.013647602942406389,0 arc_challenge,acc,0.2713310580204778,0.012993807727545801,0 arc_challenge,acc_norm,0.3054607508532423,0.01346008047800251,0 arc_easy,acc,0.6321548821548821,0.009894923464455196,0 arc_easy,acc_norm,0.5795454545454546,0.010129114278546524,0 boolq,acc,0.6235474006116208,0.008473882279194588,1 cb,acc,0.4642857142857143,0.06724777654937658,1 cb,f1,0.32592592592592595,,1 copa,acc,0.74,0.04408440022768077,0 hellaswag,acc,0.4780920135431189,0.004984989320648131,0 hellaswag,acc_norm,0.6283608842859988,0.004822550638450904,0 piqa,acc,0.7546245919477693,0.0100398313204224,0 piqa,acc_norm,0.7622415669205659,0.009932525779525489,0 rte,acc,0.5487364620938628,0.029953149241808946,0 sciq,acc,0.905,0.009276910103103326,0 sciq,acc_norm,0.873,0.010534798620855759,0 storycloze_2016,acc,0.7135221806520577,0.01045510591863303,0 winogrande,acc,0.5895816890292028,0.013825107120035863,0