task,metric,value,err,version anli_r1,acc,0.309,0.014619600977206493,0 anli_r2,acc,0.325,0.014818724459095524,0 anli_r3,acc,0.32916666666666666,0.013570806258433625,0 arc_challenge,acc,0.27986348122866894,0.01311904089772592,0 arc_challenge,acc_norm,0.3191126279863481,0.013621696119173304,0 arc_easy,acc,0.6292087542087542,0.009911292822056923,0 arc_easy,acc_norm,0.617003367003367,0.009974920384536482,0 boolq,acc,0.6305810397553517,0.008441557531799614,1 cb,acc,0.48214285714285715,0.0673769750864465,1 cb,f1,0.3338164251207729,,1 copa,acc,0.79,0.040936018074033256,0 hellaswag,acc,0.4766978689504083,0.004984359669951929,0 hellaswag,acc_norm,0.6308504282015535,0.004815882719278398,0 piqa,acc,0.7611534276387377,0.009948120385337494,0 piqa,acc_norm,0.7665941240478781,0.009869247889520993,0 rte,acc,0.5054151624548736,0.030094698123239966,0 sciq,acc,0.902,0.009406619184621252,0 sciq,acc_norm,0.885,0.01009340759490462,0 storycloze_2016,acc,0.7252805986103688,0.010322309878339504,0 winogrande,acc,0.5832675611681136,0.013856250072796318,0