task,metric,value,err,version anli_r1,acc,0.328,0.014853842487270336,0 anli_r2,acc,0.326,0.014830507204541028,0 anli_r3,acc,0.3491666666666667,0.01376707539507725,0 arc_challenge,acc,0.29436860068259385,0.013318528460539422,0 arc_challenge,acc_norm,0.30631399317406144,0.013470584417276511,0 arc_easy,acc,0.6178451178451179,0.009970747281292436,0 arc_easy,acc_norm,0.5824915824915825,0.010119187377776038,0 boolq,acc,0.5935779816513761,0.008590531708882184,1 cb,acc,0.48214285714285715,0.06737697508644648,1 cb,f1,0.3421052631578947,,1 copa,acc,0.8,0.04020151261036845,0 hellaswag,acc,0.47470623381796456,0.004983392650570958,0 hellaswag,acc_norm,0.6215893248356901,0.00483999574560232,0 piqa,acc,0.7513601741022851,0.010084511234296859,0 piqa,acc_norm,0.7568008705114254,0.010009611953858914,0 rte,acc,0.5631768953068592,0.02985524739031495,0 sciq,acc,0.902,0.009406619184621243,0 sciq,acc_norm,0.864,0.01084535023047299,0 storycloze_2016,acc,0.7167290219134153,0.010419760409155363,0 winogrande,acc,0.574585635359116,0.013895257666646382,0