task,metric,value,err,version anli_r1,acc,0.324,0.01480686473373886,0 anli_r2,acc,0.329,0.014865395385928359,0 anli_r3,acc,0.3333333333333333,0.01361395001022561,0 arc_challenge,acc,0.29948805460750855,0.013385021637313565,0 arc_challenge,acc_norm,0.3267918088737201,0.01370666597558734,0 arc_easy,acc,0.6384680134680135,0.00985850654316206,0 arc_easy,acc_norm,0.625,0.009933992677987828,0 boolq,acc,0.6311926605504588,0.008438656079759072,1 cb,acc,0.3392857142857143,0.06384226561930825,1 cb,f1,0.33391833391833387,,1 copa,acc,0.82,0.03861229196653697,0 hellaswag,acc,0.4781915952997411,0.004985032806802436,0 hellaswag,acc_norm,0.6330412268472416,0.004809901151234833,0 piqa,acc,0.7568008705114254,0.010009611953858917,0 piqa,acc_norm,0.7589771490750816,0.009979042717267315,0 rte,acc,0.5379061371841155,0.030009848912529117,0 sciq,acc,0.913,0.008916866630745906,0 sciq,acc_norm,0.908,0.0091443763931511,0 storycloze_2016,acc,0.7295563869588455,0.010271810373331022,0 winogrande,acc,0.5927387529597474,0.013808654122417845,0