task,metric,value,err,version anli_r1,acc,0.304,0.014553205687950446,0 anli_r2,acc,0.33,0.014876872027456727,0 anli_r3,acc,0.33,0.013579531277800922,0 arc_challenge,acc,0.2960750853242321,0.013340916085246268,0 arc_challenge,acc_norm,0.3216723549488055,0.013650488084494164,0 arc_easy,acc,0.6321548821548821,0.009894923464455191,0 arc_easy,acc_norm,0.6275252525252525,0.009920469215736012,0 boolq,acc,0.6388379204892967,0.00840115419524237,1 cb,acc,0.5178571428571429,0.06737697508644648,1 cb,f1,0.34887334887334887,,1 copa,acc,0.78,0.04163331998932262,0 hellaswag,acc,0.4751045608444533,0.004983592410934173,0 hellaswag,acc_norm,0.6331408086038638,0.0048096267236268486,0 piqa,acc,0.7595212187159956,0.009971345364651073,0 piqa,acc_norm,0.7676822633297062,0.009853201384168243,0 rte,acc,0.5487364620938628,0.029953149241808943,0 sciq,acc,0.911,0.009008893392651526,0 sciq,acc_norm,0.903,0.009363689373248113,0 storycloze_2016,acc,0.72367717797969,0.010340939873166822,0 winogrande,acc,0.5943172849250198,0.013800206336014201,0