task,metric,value,err,version anli_r1,acc,0.33,0.014876872027456734,0 anli_r2,acc,0.344,0.015029633724408947,0 anli_r3,acc,0.355,0.013819249004047301,0 arc_challenge,acc,0.29948805460750855,0.013385021637313558,0 arc_challenge,acc_norm,0.3293515358361775,0.013734057652635473,0 arc_easy,acc,0.648989898989899,0.009793703885101047,0 arc_easy,acc_norm,0.6452020202020202,0.009817629113069696,0 boolq,acc,0.608868501529052,0.008535239054221166,1 cb,acc,0.5,0.06741998624632421,1 cb,f1,0.42649620505163616,,1 copa,acc,0.77,0.04229525846816506,0 hellaswag,acc,0.4567815176259709,0.0049711062650465545,0 hellaswag,acc_norm,0.602867954590719,0.004883037758919963,0 piqa,acc,0.7410228509249184,0.010220966031405609,0 piqa,acc_norm,0.7486398258977149,0.01012115601681925,0 rte,acc,0.5379061371841155,0.030009848912529117,0 sciq,acc,0.917,0.008728527206074789,0 sciq,acc_norm,0.921,0.00853415677333344,0 storycloze_2016,acc,0.7103153393907001,0.01048980809194661,0 winogrande,acc,0.579321231254933,0.0138745263720083,0