task,metric,value,err,version anli_r1,acc,0.334,0.01492201952373296,0 anli_r2,acc,0.335,0.014933117490932572,0 anli_r3,acc,0.33166666666666667,0.01359683672948516,0 arc_challenge,acc,0.22525597269624573,0.01220783999540731,0 arc_challenge,acc_norm,0.25426621160409557,0.012724999945157738,0 arc_easy,acc,0.5168350168350169,0.010253966261288907,0 arc_easy,acc_norm,0.45454545454545453,0.010217299762709433,0 boolq,acc,0.5415902140672783,0.00871474901770989,1 cb,acc,0.4107142857142857,0.0663363415035954,1 cb,f1,0.1940928270042194,,1 copa,acc,0.7,0.046056618647183814,0 hellaswag,acc,0.36666002788289187,0.004809077205343495,0 hellaswag,acc_norm,0.4478191595299741,0.004962534264751926,0 piqa,acc,0.6931447225244831,0.010760295070580359,0 piqa,acc_norm,0.6942328618063112,0.010749627366141646,0 rte,acc,0.5270758122743683,0.030052303463143706,0 sciq,acc,0.809,0.012436787112179467,0 sciq,acc_norm,0.711,0.01434171135829618,0 storycloze_2016,acc,0.6493853554249065,0.011034317290463292,0 winogrande,acc,0.5382794001578532,0.014011242594964115,0