task,metric,value,err,version anli_r1,acc,0.304,0.014553205687950434,0 anli_r2,acc,0.332,0.014899597242811482,0 anli_r3,acc,0.34833333333333333,0.013759437498874061,0 arc_challenge,acc,0.2508532423208191,0.01266819862131543,0 arc_challenge,acc_norm,0.2764505119453925,0.013069662474252425,0 arc_easy,acc,0.5096801346801347,0.010257860554461122,0 arc_easy,acc_norm,0.46296296296296297,0.010231597249131062,0 boolq,acc,0.6155963302752293,0.008508133844703919,1 cb,acc,0.42857142857142855,0.06672848092813058,1 cb,f1,0.30465949820788535,,1 copa,acc,0.77,0.042295258468165065,0 hellaswag,acc,0.45429197371041624,0.004968888130290068,0 hellaswag,acc_norm,0.5927106154152559,0.004903254264177628,0 piqa,acc,0.6953210010881393,0.010738889044325161,0 piqa,acc_norm,0.6953210010881393,0.010738889044325161,0 rte,acc,0.5595667870036101,0.02988212336311872,0 sciq,acc,0.827,0.011967214137559941,0 sciq,acc_norm,0.789,0.01290913032104209,0 storycloze_2016,acc,0.6734366648850882,0.010844543793668893,0 winogrande,acc,0.5603788476716653,0.013949649776015696,0