task,metric,value,err,version anli_r1,acc,0.327,0.014842213153411249,0 anli_r2,acc,0.324,0.014806864733738854,0 anli_r3,acc,0.3175,0.013443538681348054,0 arc_challenge,acc,0.2696245733788396,0.01296804068686915,0 arc_challenge,acc_norm,0.2883959044368601,0.013238394422428175,0 arc_easy,acc,0.5951178451178452,0.010072423960395701,0 arc_easy,acc_norm,0.5803872053872053,0.010126315840891536,0 boolq,acc,0.5636085626911315,0.008674000467432073,1 cb,acc,0.5178571428571429,0.06737697508644648,1 cb,f1,0.33564993564993567,,1 copa,acc,0.76,0.04292346959909283,0 hellaswag,acc,0.43158733320055764,0.004942853459371548,0 hellaswag,acc_norm,0.5655247958573989,0.004946748608271348,0 piqa,acc,0.7328618063112078,0.010323440492612437,0 piqa,acc_norm,0.7470076169749728,0.010142888698862453,0 rte,acc,0.5270758122743683,0.030052303463143706,0 sciq,acc,0.903,0.009363689373248111,0 sciq,acc_norm,0.901,0.009449248027662747,0 storycloze_2016,acc,0.6889363976483164,0.010705164869803167,0 winogrande,acc,0.5564325177584846,0.0139626949076204,0