task,metric,value,err,version anli_r1,acc,0.313,0.014671272822977883,0 anli_r2,acc,0.325,0.014818724459095529,0 anli_r3,acc,0.33666666666666667,0.013647602942406394,0 arc_challenge,acc,0.2858361774744027,0.013203196088537369,0 arc_challenge,acc_norm,0.32081911262798635,0.013640943091946524,0 arc_easy,acc,0.6405723905723906,0.009845958893373764,0 arc_easy,acc_norm,0.6376262626262627,0.009863468202583773,0 boolq,acc,0.6204892966360857,0.008487341975756834,1 cb,acc,0.48214285714285715,0.0673769750864465,1 cb,f1,0.3995062282572102,,1 copa,acc,0.74,0.0440844002276808,0 hellaswag,acc,0.4510057757418841,0.004965768348628053,0 hellaswag,acc_norm,0.5970922127066322,0.004894801119898596,0 piqa,acc,0.7442872687704026,0.010178690109459862,0 piqa,acc_norm,0.7519042437431991,0.010077118315574703,0 rte,acc,0.5090252707581228,0.030091559826331334,0 sciq,acc,0.923,0.008434580140240651,0 sciq,acc_norm,0.925,0.00833333333333335,0 storycloze_2016,acc,0.7124532335649385,0.010466744473098368,0 winogrande,acc,0.569060773480663,0.013917796623335966,0