task,metric,value,err,version anli_r1,acc,0.33,0.014876872027456732,0 anli_r2,acc,0.333,0.014910846164229857,0 anli_r3,acc,0.3425,0.013704669762934727,0 arc_challenge,acc,0.26109215017064846,0.012835523909473848,0 arc_challenge,acc_norm,0.28924914675767915,0.013250012579393441,0 arc_easy,acc,0.5900673400673401,0.01009195352750625,0 arc_easy,acc_norm,0.5538720538720538,0.010200057828765008,0 boolq,acc,0.5706422018348624,0.008657333755353679,1 cb,acc,0.48214285714285715,0.0673769750864465,1 cb,f1,0.31768388106416273,,1 copa,acc,0.7,0.046056618647183814,0 hellaswag,acc,0.45439155546703847,0.004968979259738328,0 hellaswag,acc_norm,0.5930093606851224,0.004902690765066431,0 piqa,acc,0.7383025027203483,0.01025563077270823,0 piqa,acc_norm,0.7415669205658324,0.010213971636773315,0 rte,acc,0.5487364620938628,0.029953149241808946,0 sciq,acc,0.865,0.010811655372416051,0 sciq,acc_norm,0.837,0.01168621271274684,0 storycloze_2016,acc,0.692143238909674,0.010674598158758177,0 winogrande,acc,0.5666929755327546,0.013926915052757352,0