task,metric,value,err,version anli_r1,acc,0.339,0.014976758771620347,0 anli_r2,acc,0.322,0.014782913600996664,0 anli_r3,acc,0.35333333333333333,0.013804572162314925,0 arc_challenge,acc,0.28498293515358364,0.013191348179838793,0 arc_challenge,acc_norm,0.310580204778157,0.01352229209805305,0 arc_easy,acc,0.6195286195286195,0.00996230599205857,0 arc_easy,acc_norm,0.6136363636363636,0.009991296778159615,0 boolq,acc,0.5290519877675841,0.00873028052845153,1 cb,acc,0.375,0.06527912098338669,1 cb,f1,0.25089094796863864,,1 copa,acc,0.76,0.04292346959909283,0 hellaswag,acc,0.4671380203146783,0.004978992721242829,0 hellaswag,acc_norm,0.6250746863174667,0.004831142570475509,0 piqa,acc,0.7453754080522307,0.01016443223706049,0 piqa,acc_norm,0.7595212187159956,0.009971345364651066,0 rte,acc,0.5018050541516246,0.030096267148976626,0 sciq,acc,0.906,0.009233052000787736,0 sciq,acc_norm,0.894,0.009739551265785133,0 storycloze_2016,acc,0.7252805986103688,0.010322309878339502,0 winogrande,acc,0.5832675611681136,0.01385625007279632,0