task,metric,value,err,version anli_r1,acc,0.367,0.01524937846417175,0 anli_r2,acc,0.325,0.014818724459095527,0 anli_r3,acc,0.3425,0.013704669762934722,0 arc_challenge,acc,0.28498293515358364,0.013191348179838795,0 arc_challenge,acc_norm,0.3191126279863481,0.013621696119173304,0 arc_easy,acc,0.6056397306397306,0.010028176038393007,0 arc_easy,acc_norm,0.5812289562289562,0.010123487160167819,0 boolq,acc,0.6192660550458715,0.008492625561656217,1 cb,acc,0.42857142857142855,0.06672848092813058,1 cb,f1,0.3487396784006953,,1 copa,acc,0.71,0.04560480215720684,0 hellaswag,acc,0.46265684126667994,0.004975845335086618,0 hellaswag,acc_norm,0.6078470424218283,0.004872326888655505,0 piqa,acc,0.7453754080522307,0.01016443223706048,0 piqa,acc_norm,0.7453754080522307,0.010164432237060492,0 rte,acc,0.5379061371841155,0.030009848912529117,0 sciq,acc,0.865,0.010811655372416051,0 sciq,acc_norm,0.851,0.01126614068463217,0 storycloze_2016,acc,0.7113842864778194,0.010478311785642947,0 winogrande,acc,0.5572217837411207,0.013960157350784985,0