task,metric,value,err,version anli_r1,acc,0.348,0.015070604603768408,0 anli_r2,acc,0.325,0.014818724459095526,0 anli_r3,acc,0.3433333333333333,0.01371263383046586,0 arc_challenge,acc,0.25341296928327645,0.012710896778378607,0 arc_challenge,acc_norm,0.2790102389078498,0.013106784883601341,0 arc_easy,acc,0.5058922558922558,0.010259071083844221,0 arc_easy,acc_norm,0.4970538720538721,0.010259605416237574,0 boolq,acc,0.5235474006116208,0.008735351675636606,1 cb,acc,0.5,0.06741998624632421,1 cb,f1,0.3080848777867311,,1 copa,acc,0.7,0.046056618647183814,0 hellaswag,acc,0.3981278629755029,0.004885116465550268,0 hellaswag,acc_norm,0.4869547898824935,0.00498808282521327,0 piqa,acc,0.6735582154515778,0.0109404670461773,0 piqa,acc_norm,0.6833514689880305,0.010853160531978484,0 rte,acc,0.4693140794223827,0.030039730592197812,0 sciq,acc,0.761,0.013493000446937594,0 sciq,acc_norm,0.745,0.013790038620872833,0 storycloze_2016,acc,0.6451095670764297,0.01106478765990412,0 winogrande,acc,0.4980268350434096,0.01405237625922564,0