task,metric,value,err,version anli_r1,acc,0.335,0.014933117490932577,0 anli_r2,acc,0.337,0.014955087918653609,0 anli_r3,acc,0.3325,0.013605417345710528,0 arc_challenge,acc,0.29948805460750855,0.013385021637313563,0 arc_challenge,acc_norm,0.3361774744027304,0.013804855026205763,0 arc_easy,acc,0.6380471380471381,0.00986099146668847,0 arc_easy,acc_norm,0.6216329966329966,0.00995157568333195,0 boolq,acc,0.6192660550458715,0.008492625561656215,1 cb,acc,0.23214285714285715,0.056929390240001085,1 cb,f1,0.23148148148148148,,1 copa,acc,0.84,0.0368452949177471,0 hellaswag,acc,0.47470623381796456,0.004983392650570959,0 hellaswag,acc_norm,0.6319458275243975,0.004812905279066442,0 piqa,acc,0.7529923830250272,0.010062268140772625,0 piqa,acc_norm,0.7568008705114254,0.010009611953858917,0 rte,acc,0.47653429602888087,0.030063300411902652,0 sciq,acc,0.915,0.008823426366942323,0 sciq,acc_norm,0.91,0.009054390204866444,0 storycloze_2016,acc,0.721004810261892,0.010371620932652795,0 winogrande,acc,0.6108918705603789,0.013702520871485949,0