task,metric,value,err,version anli_r1,acc,0.328,0.014853842487270333,0 anli_r2,acc,0.351,0.015100563798316409,0 anli_r3,acc,0.34833333333333333,0.013759437498874086,0 arc_challenge,acc,0.30119453924914674,0.01340674176784762,0 arc_challenge,acc_norm,0.31143344709897613,0.013532472099850947,0 arc_easy,acc,0.6165824915824916,0.009976995068264717,0 arc_easy,acc_norm,0.6060606060606061,0.010026305355981814,0 boolq,acc,0.6058103975535168,0.008546995661233634,1 cb,acc,0.4107142857142857,0.06633634150359541,1 cb,f1,0.3815668202764977,,1 copa,acc,0.8,0.04020151261036845,0 hellaswag,acc,0.46863174666401114,0.004979952166595543,0 hellaswag,acc_norm,0.6213901613224457,0.0048404936031661945,0 piqa,acc,0.7611534276387377,0.0099481203853375,0 piqa,acc_norm,0.7600652883569097,0.009963625892809545,0 rte,acc,0.5090252707581228,0.030091559826331334,0 sciq,acc,0.903,0.009363689373248088,0 sciq,acc_norm,0.891,0.009859828407037185,0 storycloze_2016,acc,0.7258150721539284,0.010316062787590006,0 winogrande,acc,0.574585635359116,0.013895257666646378,0