task,metric,value,err,version anli_r1,acc,0.327,0.01484221315341124,0 anli_r2,acc,0.332,0.014899597242811492,0 anli_r3,acc,0.34,0.01368049572576779,0 arc_challenge,acc,0.28071672354948807,0.013131238126975586,0 arc_challenge,acc_norm,0.31313993174061433,0.013552671543623504,0 arc_easy,acc,0.6031144781144782,0.010039236800583209,0 arc_easy,acc_norm,0.5723905723905723,0.010151683397430673,0 boolq,acc,0.5788990825688073,0.008635491562221344,1 cb,acc,0.5,0.06741998624632421,1 cb,f1,0.35057471264367807,,1 copa,acc,0.8,0.040201512610368445,0 hellaswag,acc,0.4731129257120096,0.0049825618152141244,0 hellaswag,acc_norm,0.6270663214499104,0.004825963768772216,0 piqa,acc,0.7589771490750816,0.009979042717267314,0 piqa,acc_norm,0.7616974972796517,0.009940334245876219,0 rte,acc,0.5306859205776173,0.03003973059219781,0 sciq,acc,0.892,0.009820001651345696,0 sciq,acc_norm,0.89,0.009899393819724446,0 storycloze_2016,acc,0.7140566541956174,0.010449259851345842,0 winogrande,acc,0.574585635359116,0.013895257666646378,0