task,metric,value,err,version anli_r1,acc,0.322,0.014782913600996664,0 anli_r2,acc,0.313,0.014671272822977892,0 anli_r3,acc,0.3491666666666667,0.013767075395077249,0 arc_challenge,acc,0.2832764505119454,0.013167478735134575,0 arc_challenge,acc_norm,0.3165529010238908,0.013592431519068079,0 arc_easy,acc,0.6022727272727273,0.010042861602178061,0 arc_easy,acc_norm,0.5803872053872053,0.010126315840891539,0 boolq,acc,0.6174311926605505,0.008500443818876165,1 cb,acc,0.42857142857142855,0.06672848092813058,1 cb,f1,0.2745098039215686,,1 copa,acc,0.75,0.04351941398892446,0 hellaswag,acc,0.4613622784305915,0.004974860878464439,0 hellaswag,acc_norm,0.6013742282413862,0.004886147907627404,0 piqa,acc,0.7475516866158868,0.01013566554736236,0 piqa,acc_norm,0.7383025027203483,0.010255630772708229,0 rte,acc,0.5487364620938628,0.029953149241808943,0 sciq,acc,0.867,0.010743669132397332,0 sciq,acc_norm,0.85,0.011297239823409296,0 storycloze_2016,acc,0.7033671833244255,0.010562819181563226,0 winogrande,acc,0.5509076558800315,0.013979459389140844,0