task,metric,value,err,version anli_r1,acc,0.336,0.014944140233795027,0 anli_r2,acc,0.329,0.014865395385928364,0 anli_r3,acc,0.34833333333333333,0.013759437498874072,0 arc_challenge,acc,0.2713310580204778,0.01299380772754579,0 arc_challenge,acc_norm,0.295221843003413,0.013329750293382318,0 arc_easy,acc,0.5909090909090909,0.010088775152615786,0 arc_easy,acc_norm,0.5311447811447811,0.010239860250021745,0 boolq,acc,0.6201834862385321,0.008488668235778617,1 cb,acc,0.4642857142857143,0.0672477765493766,1 cb,f1,0.3271604938271605,,1 copa,acc,0.78,0.04163331998932261,0 hellaswag,acc,0.47410874327823144,0.004983087049281741,0 hellaswag,acc_norm,0.619896434973113,0.004844199910173022,0 piqa,acc,0.7595212187159956,0.009971345364651078,0 piqa,acc_norm,0.764417845484222,0.009901067586473883,0 rte,acc,0.5451263537906137,0.029973636495415252,0 sciq,acc,0.843,0.01151014697923019,0 sciq,acc_norm,0.755,0.013607356839598123,0 storycloze_2016,acc,0.726349545697488,0.0103097970944971,0 winogrande,acc,0.585635359116022,0.013844846232268563,0