task,metric,value,err,version anli_r1,acc,0.339,0.014976758771620342,0 anli_r2,acc,0.336,0.014944140233795027,0 anli_r3,acc,0.3458333333333333,0.013736245342311012,0 arc_challenge,acc,0.31143344709897613,0.013532472099850945,0 arc_challenge,acc_norm,0.3412969283276451,0.013855831287497719,0 arc_easy,acc,0.6734006734006734,0.00962304703826764,0 arc_easy,acc_norm,0.5892255892255892,0.010095101349348653,0 boolq,acc,0.6351681957186545,0.00841944098496366,1 cb,acc,0.39285714285714285,0.0658538889806635,1 cb,f1,0.19047619047619047,,1 copa,acc,0.82,0.038612291966536955,0 hellaswag,acc,0.5356502688707429,0.004977081808179436,0 hellaswag,acc_norm,0.7113124875522804,0.004522262128177007,0 piqa,acc,0.780739934711643,0.00965335746360531,0 piqa,acc_norm,0.7894450489662677,0.009512378081238743,0 rte,acc,0.628158844765343,0.029091018492217447,0 sciq,acc,0.896,0.009658016218524298,0 sciq,acc_norm,0.823,0.012075463420375061,0 storycloze_2016,acc,0.7589524318546232,0.009890946490576938,0 winogrande,acc,0.648776637726914,0.013415981370545126,0