task,metric,value,err,version anli_r1,acc,0.339,0.014976758771620345,0 anli_r2,acc,0.329,0.014865395385928364,0 anli_r3,acc,0.33916666666666667,0.013672343491681822,0 arc_challenge,acc,0.24658703071672355,0.012595726268790127,0 arc_challenge,acc_norm,0.26621160409556316,0.012915774781523223,0 arc_easy,acc,0.49747474747474746,0.010259652668783469,0 arc_easy,acc_norm,0.4675925925925926,0.010238210368801886,0 boolq,acc,0.4782874617737003,0.008736805647519946,1 cb,acc,0.5178571428571429,0.06737697508644647,1 cb,f1,0.25267737617135205,,1 copa,acc,0.7,0.046056618647183814,0 hellaswag,acc,0.4008165704043019,0.0048906236932436216,0 hellaswag,acc_norm,0.4962158932483569,0.004989638507409917,0 piqa,acc,0.6953210010881393,0.010738889044325161,0 piqa,acc_norm,0.7002176278563657,0.01068968696713809,0 rte,acc,0.44765342960288806,0.02993107036293953,0 sciq,acc,0.727,0.014095022868717598,0 sciq,acc_norm,0.68,0.014758652303574874,0 storycloze_2016,acc,0.6483164083377873,0.011042025772682543,0 winogrande,acc,0.5185477505919495,0.014042813708888378,0