task,metric,value,err,version anli_r1,acc,0.333,0.01491084616422986,0 anli_r2,acc,0.332,0.014899597242811478,0 anli_r3,acc,0.33916666666666667,0.013672343491681822,0 arc_challenge,acc,0.35665529010238906,0.013998056902620203,0 arc_challenge,acc_norm,0.37627986348122866,0.014157022555407173,0 arc_easy,acc,0.7028619528619529,0.009377397867796849,0 arc_easy,acc_norm,0.6771885521885522,0.009593950220366737,0 boolq,acc,0.6486238532110091,0.00834978197660316,1 cb,acc,0.14285714285714285,0.04718416136255829,1 cb,f1,0.14017094017094014,,1 copa,acc,0.86,0.034873508801977725,0 hellaswag,acc,0.5324636526588329,0.004979252954977319,0 hellaswag,acc_norm,0.7127066321449911,0.004515748192605716,0 piqa,acc,0.764961915125136,0.00989314668880531,0 piqa,acc_norm,0.7840043525571273,0.009601236303553544,0 rte,acc,0.4981949458483754,0.030096267148976626,0 sciq,acc,0.938,0.007629823996280306,0 sciq,acc_norm,0.93,0.008072494358323508,0 storycloze_2016,acc,0.7546766435061465,0.009950137914623096,0 winogrande,acc,0.6195737963693765,0.013644727908656833,0