task,metric,value,err,version anli_r1,acc,0.313,0.014671272822977885,0 anli_r2,acc,0.325,0.014818724459095527,0 anli_r3,acc,0.3491666666666667,0.01376707539507725,0 arc_challenge,acc,0.2764505119453925,0.013069662474252425,0 arc_challenge,acc_norm,0.2986348122866894,0.013374078615068752,0 arc_easy,acc,0.6346801346801347,0.009880576614806928,0 arc_easy,acc_norm,0.6254208754208754,0.009931758820410629,0 boolq,acc,0.618348623853211,0.008496550741178254,1 cb,acc,0.4107142857142857,0.0663363415035954,1 cb,f1,0.2576489533011272,,1 copa,acc,0.79,0.040936018074033256,0 hellaswag,acc,0.45200159330810596,0.004966736811010493,0 hellaswag,acc_norm,0.5935072694682334,0.004901747426331751,0 piqa,acc,0.7486398258977149,0.010121156016819255,0 piqa,acc_norm,0.750816104461371,0.01009188277012021,0 rte,acc,0.516245487364621,0.030080573208738064,0 sciq,acc,0.898,0.009575368801653892,0 sciq,acc_norm,0.902,0.009406619184621228,0 storycloze_2016,acc,0.6953500801710315,0.0106434269886468,0 winogrande,acc,0.5666929755327546,0.01392691505275734,0