task,metric,value,err,version anli_r1,acc,0.342,0.01500870618212173,0 anli_r2,acc,0.346,0.015050266127564441,0 anli_r3,acc,0.3408333333333333,0.013688600793296939,0 arc_challenge,acc,0.2841296928327645,0.013179442447653886,0 arc_challenge,acc_norm,0.29692832764505117,0.013352025976725223,0 arc_easy,acc,0.6224747474747475,0.009947227833469435,0 arc_easy,acc_norm,0.5437710437710438,0.01022039438372202,0 boolq,acc,0.6165137614678899,0.008504304838837027,1 cb,acc,0.39285714285714285,0.0658538889806635,1 cb,f1,0.24418093983311376,,1 copa,acc,0.78,0.04163331998932262,0 hellaswag,acc,0.4781915952997411,0.004985032806802436,0 hellaswag,acc_norm,0.6294562836088429,0.004819633668832535,0 piqa,acc,0.7519042437431991,0.010077118315574719,0 piqa,acc_norm,0.7611534276387377,0.009948120385337485,0 rte,acc,0.5487364620938628,0.029953149241808943,0 sciq,acc,0.848,0.011358918303475279,0 sciq,acc_norm,0.76,0.013512312258920831,0 storycloze_2016,acc,0.7247461250668092,0.010328538400500567,0 winogrande,acc,0.6045777426992897,0.013741678387545348,0