task,metric,value,err,version anli_r1,acc,0.305,0.014566646394664387,0 anli_r2,acc,0.317,0.014721675438880213,0 anli_r3,acc,0.335,0.013630871843821477,0 arc_challenge,acc,0.29436860068259385,0.013318528460539427,0 arc_challenge,acc_norm,0.3097269624573379,0.013512058415238361,0 arc_easy,acc,0.6195286195286195,0.009962305992058584,0 arc_easy,acc_norm,0.5904882154882155,0.010090368160990062,0 boolq,acc,0.6314984709480123,0.00843719989350296,1 cb,acc,0.5357142857142857,0.06724777654937658,1 cb,f1,0.37216374269005853,,1 copa,acc,0.76,0.04292346959909283,0 hellaswag,acc,0.4765982871937861,0.004984313205791443,0 hellaswag,acc_norm,0.6290579565823541,0.004820697457420433,0 piqa,acc,0.7627856365614799,0.009924694933586376,0 piqa,acc_norm,0.7584330794341676,0.009986718001804444,0 rte,acc,0.555956678700361,0.029907396333795987,0 sciq,acc,0.905,0.009276910103103322,0 sciq,acc_norm,0.876,0.01042749887234396,0 storycloze_2016,acc,0.7172634954569749,0.01041380648612127,0 winogrande,acc,0.5816890292028414,0.013863669961195904,0