task,metric,value,err,version anli_r1,acc,0.339,0.014976758771620342,0 anli_r2,acc,0.323,0.014794927843348644,0 anli_r3,acc,0.3441666666666667,0.013720551062295756,0 arc_challenge,acc,0.2721843003412969,0.013006600406423706,0 arc_challenge,acc_norm,0.3037542662116041,0.013438909184778764,0 arc_easy,acc,0.6056397306397306,0.010028176038393004,0 arc_easy,acc_norm,0.5606060606060606,0.010184134315437663,0 boolq,acc,0.5773700305810398,0.008639722698719023,1 cb,acc,0.5,0.06741998624632421,1 cb,f1,0.3261261261261261,,1 copa,acc,0.81,0.03942772444036623,0 hellaswag,acc,0.4643497311292571,0.004977081808179424,0 hellaswag,acc_norm,0.6074487153953396,0.004873203269366301,0 piqa,acc,0.7535364526659413,0.010054810789671824,0 piqa,acc_norm,0.7595212187159956,0.009971345364651068,0 rte,acc,0.5270758122743683,0.030052303463143706,0 sciq,acc,0.847,0.01138950045966553,0 sciq,acc_norm,0.792,0.012841374572096928,0 storycloze_2016,acc,0.7129877071084981,0.010460934115933261,0 winogrande,acc,0.5777426992896606,0.013881582030658549,0