task,metric,value,err,version anli_r1,acc,0.286,0.014297146862517908,0 anli_r2,acc,0.304,0.01455320568795044,0 anli_r3,acc,0.3175,0.013443538681348054,0 arc_challenge,acc,0.26535836177474403,0.012902554762313969,0 arc_challenge,acc_norm,0.2909556313993174,0.013273077865907573,0 arc_easy,acc,0.5122053872053872,0.010256726235129016,0 arc_easy,acc_norm,0.4877946127946128,0.01025672623512901,0 boolq,acc,0.6862385321100918,0.008115773046958279,1 cb,acc,0.35714285714285715,0.06460957383809221,1 cb,f1,0.27666815942678014,,1 copa,acc,0.69,0.04648231987117316,0 hellaswag,acc,0.4907388966341366,0.004988925410522774,0 hellaswag,acc_norm,0.5834495120493925,0.00491979470467327,0 piqa,acc,0.6605005440696409,0.011048455047173918,0 piqa,acc_norm,0.6534276387377584,0.011103020320872166,0 rte,acc,0.5631768953068592,0.029855247390314945,0 sciq,acc,0.905,0.009276910103103324,0 sciq,acc_norm,0.872,0.010570133761108658,0 storycloze_2016,acc,0.5879208979155531,0.011382271506935862,0 winogrande,acc,0.5177584846093133,0.014043619596174966,0