task,metric,value,err,version anli_r1,acc,0.321,0.01477082181793464,0 anli_r2,acc,0.342,0.01500870618212173,0 anli_r3,acc,0.32083333333333336,0.013480882752851552,0 arc_challenge,acc,0.28071672354948807,0.013131238126975588,0 arc_challenge,acc_norm,0.3191126279863481,0.013621696119173297,0 arc_easy,acc,0.6224747474747475,0.009947227833469432,0 arc_easy,acc_norm,0.601010101010101,0.010048240683798745,0 boolq,acc,0.6125382262996942,0.00852066653613694,1 cb,acc,0.5178571428571429,0.06737697508644647,1 cb,f1,0.35968427443837275,,1 copa,acc,0.82,0.038612291966536955,0 hellaswag,acc,0.465345548695479,0.004977782217582457,0 hellaswag,acc_norm,0.6182035451105358,0.004848341560492137,0 piqa,acc,0.7464635473340587,0.010150090834551782,0 piqa,acc_norm,0.7557127312295974,0.010024765172284256,0 rte,acc,0.5595667870036101,0.029882123363118712,0 sciq,acc,0.905,0.009276910103103305,0 sciq,acc_norm,0.902,0.009406619184621235,0 storycloze_2016,acc,0.7242116515232496,0.010334748387645672,0 winogrande,acc,0.5753749013417522,0.013891893150264224,0