task,metric,value,err,version anli_r1,acc,0.316,0.014709193056057121,0 anli_r2,acc,0.347,0.015060472031706622,0 anli_r3,acc,0.3308333333333333,0.013588208070709007,0 arc_challenge,acc,0.29948805460750855,0.013385021637313562,0 arc_challenge,acc_norm,0.31569965870307165,0.013582571095815291,0 arc_easy,acc,0.6321548821548821,0.00989492346445519,0 arc_easy,acc_norm,0.6123737373737373,0.009997307914447612,0 boolq,acc,0.6030581039755352,0.008557276964675146,1 cb,acc,0.5178571428571429,0.06737697508644647,1 cb,f1,0.39707602339181286,,1 copa,acc,0.81,0.03942772444036623,0 hellaswag,acc,0.47211710814578767,0.004982016702445961,0 hellaswag,acc_norm,0.6292571200955985,0.004820166002253063,0 piqa,acc,0.7611534276387377,0.0099481203853375,0 piqa,acc_norm,0.7611534276387377,0.009948120385337484,0 rte,acc,0.5234657039711191,0.03006330041190266,0 sciq,acc,0.915,0.008823426366942314,0 sciq,acc_norm,0.909,0.009099549538400241,0 storycloze_2016,acc,0.7258150721539284,0.010316062787590011,0 winogrande,acc,0.5943172849250198,0.013800206336014208,0