task,metric,value,err,version anli_r1,acc,0.329,0.014865395385928369,0 anli_r2,acc,0.356,0.015149042659306625,0 anli_r3,acc,0.3358333333333333,0.013639261190932887,0 arc_challenge,acc,0.2986348122866894,0.013374078615068756,0 arc_challenge,acc_norm,0.3310580204778157,0.013752062419817834,0 arc_easy,acc,0.6372053872053872,0.009865936757013936,0 arc_easy,acc_norm,0.6325757575757576,0.009892552616211551,0 boolq,acc,0.5899082568807339,0.008602512053254416,1 cb,acc,0.375,0.06527912098338669,1 cb,f1,0.3196248196248196,,1 copa,acc,0.76,0.04292346959909283,0 hellaswag,acc,0.4556861183031269,0.004970145708188008,0 hellaswag,acc_norm,0.6085441147181836,0.004870785036708275,0 piqa,acc,0.7459194776931447,0.010157271999135043,0 piqa,acc_norm,0.750816104461371,0.01009188277012021,0 rte,acc,0.4657039711191336,0.030025579819366422,0 sciq,acc,0.927,0.008230354715244054,0 sciq,acc_norm,0.934,0.007855297938697587,0 storycloze_2016,acc,0.7194013896312133,0.010389809647288821,0 winogrande,acc,0.5895816890292028,0.013825107120035868,0