task,metric,value,err,version anli_r1,acc,0.329,0.014865395385928366,0 anli_r2,acc,0.329,0.014865395385928364,0 anli_r3,acc,0.3458333333333333,0.013736245342311012,0 arc_challenge,acc,0.2627986348122867,0.012862523175351333,0 arc_challenge,acc_norm,0.28242320819112626,0.013155456884097222,0 arc_easy,acc,0.5652356902356902,0.010172083670402784,0 arc_easy,acc_norm,0.5130471380471381,0.01025628992505844,0 boolq,acc,0.6241590214067279,0.008471147248160112,1 cb,acc,0.4107142857142857,0.0663363415035954,1 cb,f1,0.1940928270042194,,1 copa,acc,0.75,0.04351941398892446,0 hellaswag,acc,0.45429197371041624,0.00496888813029007,0 hellaswag,acc_norm,0.5936068512248556,0.004901558132335521,0 piqa,acc,0.7372143634385201,0.010269354068140767,0 piqa,acc_norm,0.7459194776931447,0.010157271999135051,0 rte,acc,0.5342960288808665,0.030025579819366426,0 sciq,acc,0.826,0.011994493230973428,0 sciq,acc_norm,0.726,0.014111099288259588,0 storycloze_2016,acc,0.7108498129342598,0.010484068799942079,0 winogrande,acc,0.5619573796369376,0.013944181296470804,0