task,metric,value,err,version anli_r1,acc,0.327,0.014842213153411249,0 anli_r2,acc,0.332,0.014899597242811483,0 anli_r3,acc,0.3416666666666667,0.013696658778002512,0 arc_challenge,acc,0.17064846416382254,0.010993654168413735,0 arc_challenge,acc_norm,0.2150170648464164,0.01200571763413361,0 arc_easy,acc,0.34595959595959597,0.009760749624427521,0 arc_easy,acc_norm,0.3371212121212121,0.009700146509130083,0 boolq,acc,0.5938837920489297,0.00858951094378741,1 cb,acc,0.44642857142857145,0.06703189227942397,1 cb,f1,0.3083804143126177,,1 copa,acc,0.52,0.050211673156867795,0 hellaswag,acc,0.26329416450906196,0.004395205528158076,0 hellaswag,acc_norm,0.26926906990639315,0.004426734718808876,0 piqa,acc,0.5696409140369967,0.011552114834700509,0 piqa,acc_norm,0.5647442872687704,0.011567608588759421,0 rte,acc,0.49097472924187724,0.030091559826331334,0 sciq,acc,0.628,0.015292149942040577,0 sciq,acc_norm,0.576,0.01563548747140519,0 storycloze_2016,acc,0.5200427578834848,0.011553138977961012,0 winogrande,acc,0.505130228887135,0.014051745961790516,0