task,metric,value,err,version anli_r1,acc,0.347,0.015060472031706622,0 anli_r2,acc,0.346,0.015050266127564448,0 anli_r3,acc,0.335,0.013630871843821476,0 arc_challenge,acc,0.2935153583617747,0.013307250444941122,0 arc_challenge,acc_norm,0.31143344709897613,0.013532472099850942,0 arc_easy,acc,0.6174242424242424,0.009972837790531477,0 arc_easy,acc_norm,0.6102693602693603,0.010007169391797055,0 boolq,acc,0.5654434250764526,0.008669824006668013,1 cb,acc,0.35714285714285715,0.06460957383809221,1 cb,f1,0.2986564996368918,,1 copa,acc,0.8,0.04020151261036845,0 hellaswag,acc,0.38259310894244175,0.004850268986903357,0 hellaswag,acc_norm,0.48078072097191793,0.004986093791041665,0 piqa,acc,0.7573449401523396,0.010002002569708698,0 piqa,acc_norm,0.7665941240478781,0.00986924788952099,0 rte,acc,0.5595667870036101,0.029882123363118726,0 sciq,acc,0.84,0.011598902298689009,0 sciq,acc_norm,0.818,0.012207580637662144,0 storycloze_2016,acc,0.6483164083377873,0.011042025772682543,0 winogrande,acc,0.580110497237569,0.013870943986310396,0