task,metric,value,err,version anli_r1,acc,0.337,0.014955087918653605,0 anli_r2,acc,0.339,0.014976758771620345,0 anli_r3,acc,0.33,0.013579531277800923,0 arc_challenge,acc,0.2901023890784983,0.01326157367752076,0 arc_challenge,acc_norm,0.3174061433447099,0.01360223908803817,0 arc_easy,acc,0.63510101010101,0.009878157021155649,0 arc_easy,acc_norm,0.625,0.009933992677987828,0 boolq,acc,0.618348623853211,0.008496550741178263,1 cb,acc,0.4642857142857143,0.06724777654937658,1 cb,f1,0.32751039809863336,,1 copa,acc,0.78,0.04163331998932261,0 hellaswag,acc,0.4508066122286397,0.004965572246803864,0 hellaswag,acc_norm,0.5974905397331209,0.004894012555642632,0 piqa,acc,0.7470076169749728,0.01014288869886246,0 piqa,acc_norm,0.7486398258977149,0.01012115601681924,0 rte,acc,0.5018050541516246,0.030096267148976626,0 sciq,acc,0.905,0.009276910103103317,0 sciq,acc_norm,0.906,0.009233052000787735,0 storycloze_2016,acc,0.7001603420630679,0.010595525174558598,0 winogrande,acc,0.5611681136543015,0.013946933444507032,0