task,metric,value,err,version anli_r1,acc,0.351,0.015100563798316407,0 anli_r2,acc,0.333,0.014910846164229859,0 anli_r3,acc,0.3441666666666667,0.013720551062295755,0 arc_challenge,acc,0.2636518771331058,0.012875929151297046,0 arc_challenge,acc_norm,0.3046075085324232,0.01344952210993249,0 arc_easy,acc,0.5968013468013468,0.0100656685767948,0 arc_easy,acc_norm,0.5555555555555556,0.01019625483869168,0 boolq,acc,0.6174311926605505,0.008500443818876161,1 cb,acc,0.42857142857142855,0.06672848092813058,1 cb,f1,0.30142857142857143,,1 copa,acc,0.8,0.040201512610368445,0 hellaswag,acc,0.4660426209918343,0.004978260641742204,0 hellaswag,acc_norm,0.6097390957976498,0.00486811759848194,0 piqa,acc,0.7453754080522307,0.01016443223706048,0 piqa,acc_norm,0.7551686615886833,0.0100323091055688,0 rte,acc,0.5415162454873647,0.029992535385373314,0 sciq,acc,0.841,0.01156947936827129,0 sciq,acc_norm,0.798,0.012702651587655137,0 storycloze_2016,acc,0.7177979690005345,0.010407834479647673,0 winogrande,acc,0.585635359116022,0.013844846232268563,0