task,metric,value,err,version anli_r1,acc,0.34,0.014987482264363937,0 anli_r2,acc,0.325,0.014818724459095527,0 anli_r3,acc,0.35583333333333333,0.013826518748493314,0 arc_challenge,acc,0.2687713310580205,0.012955065963710691,0 arc_challenge,acc_norm,0.29948805460750855,0.013385021637313574,0 arc_easy,acc,0.577020202020202,0.010137328382209094,0 arc_easy,acc_norm,0.5315656565656566,0.010239317603199507,0 boolq,acc,0.598776758409786,0.008572708337178997,1 cb,acc,0.48214285714285715,0.0673769750864465,1 cb,f1,0.40095238095238095,,1 copa,acc,0.8,0.040201512610368445,0 hellaswag,acc,0.4733120892252539,0.00498266845211894,0 hellaswag,acc_norm,0.6218880701055567,0.0048392473326060465,0 piqa,acc,0.7578890097932536,0.009994371269104381,0 piqa,acc_norm,0.7633297062023939,0.009916841655042809,0 rte,acc,0.592057761732852,0.029581952519606193,0 sciq,acc,0.837,0.011686212712746839,0 sciq,acc_norm,0.788,0.012931481864938034,0 storycloze_2016,acc,0.7012292891501871,0.010584692134739969,0 winogrande,acc,0.580110497237569,0.013870943986310393,0