task,metric,value,err,version anli_r1,acc,0.321,0.014770821817934635,0 anli_r2,acc,0.334,0.01492201952373296,0 anli_r3,acc,0.32,0.013471620929769135,0 arc_challenge,acc,0.27303754266211605,0.01301933276263575,0 arc_challenge,acc_norm,0.30119453924914674,0.013406741767847626,0 arc_easy,acc,0.6043771043771043,0.010033741393430986,0 arc_easy,acc_norm,0.5778619528619529,0.01013462052459227,0 boolq,acc,0.617737003058104,0.008499149690449272,1 cb,acc,0.42857142857142855,0.06672848092813058,1 cb,f1,0.2988943957300801,,1 copa,acc,0.78,0.04163331998932261,0 hellaswag,acc,0.45130452101175067,0.004966060995315068,0 hellaswag,acc_norm,0.5956980681139216,0.004897534686686327,0 piqa,acc,0.7377584330794341,0.01026250256517245,0 piqa,acc_norm,0.7442872687704026,0.01017869010945987,0 rte,acc,0.516245487364621,0.030080573208738064,0 sciq,acc,0.896,0.009658016218524305,0 sciq,acc_norm,0.863,0.010878848714333316,0 storycloze_2016,acc,0.6980224478888295,0.010616985436073357,0 winogrande,acc,0.5714285714285714,0.013908353814606686,0