task,metric,value,err,version anli_r1,acc,0.328,0.014853842487270334,0 anli_r2,acc,0.321,0.014770821817934649,0 anli_r3,acc,0.3491666666666667,0.013767075395077249,0 arc_challenge,acc,0.3250853242320819,0.013688147309729124,0 arc_challenge,acc_norm,0.35665529010238906,0.013998056902620199,0 arc_easy,acc,0.6742424242424242,0.009616642976885964,0 arc_easy,acc_norm,0.6405723905723906,0.009845958893373752,0 boolq,acc,0.6489296636085627,0.00834811495726361,1 cb,acc,0.4642857142857143,0.06724777654937658,1 cb,f1,0.3013448230839535,,1 copa,acc,0.79,0.040936018074033256,0 hellaswag,acc,0.5251941844254132,0.00498344288867777,0 hellaswag,acc_norm,0.6989643497311293,0.0045777070250313644,0 piqa,acc,0.7747551686615887,0.009746643471032145,0 piqa,acc_norm,0.779651795429815,0.00967053545685313,0 rte,acc,0.5342960288808665,0.030025579819366426,0 sciq,acc,0.916,0.008776162089491127,0 sciq,acc_norm,0.897,0.009616833339695792,0 storycloze_2016,acc,0.7504008551576697,0.010008002459430844,0 winogrande,acc,0.6108918705603789,0.013702520871485945,0