task,metric,value,err,version anli_r1,acc,0.293,0.014399942998441271,0 anli_r2,acc,0.327,0.01484221315341124,0 anli_r3,acc,0.3383333333333333,0.013664144006618275,0 arc_challenge,acc,0.33447098976109213,0.013787460322441384,0 arc_challenge,acc_norm,0.3438566552901024,0.01388064457015621,0 arc_easy,acc,0.6759259259259259,0.009603728850095394,0 arc_easy,acc_norm,0.640993265993266,0.009843424713072176,0 boolq,acc,0.6669724770642201,0.00824302391268888,1 cb,acc,0.32142857142857145,0.06297362289056341,1 cb,f1,0.2706949089557785,,1 copa,acc,0.77,0.042295258468165065,0 hellaswag,acc,0.5265883290181239,0.0049827214724073405,0 hellaswag,acc_norm,0.7029476199960167,0.00456025908319738,0 piqa,acc,0.7763873775843307,0.009721489519176294,0 piqa,acc_norm,0.7883569096844396,0.009530351270479392,0 rte,acc,0.5595667870036101,0.029882123363118723,0 sciq,acc,0.928,0.008178195576218681,0 sciq,acc_norm,0.911,0.009008893392651523,0 storycloze_2016,acc,0.7413148049171566,0.010126662138021714,0 winogrande,acc,0.6243093922651933,0.013611257508380444,0