task,metric,value,err,version anli_r1,acc,0.337,0.014955087918653603,0 anli_r2,acc,0.33,0.014876872027456734,0 anli_r3,acc,0.3383333333333333,0.01366414400661827,0 arc_challenge,acc,0.2721843003412969,0.013006600406423706,0 arc_challenge,acc_norm,0.30119453924914674,0.013406741767847632,0 arc_easy,acc,0.5774410774410774,0.01013597822298108,0 arc_easy,acc_norm,0.5290404040404041,0.010242463826395614,0 boolq,acc,0.6076452599388379,0.008539983838167734,1 cb,acc,0.4642857142857143,0.06724777654937658,1 cb,f1,0.26694444444444443,,1 copa,acc,0.73,0.04461960433384741,0 hellaswag,acc,0.46285600477992433,0.004975993795562032,0 hellaswag,acc_norm,0.6013742282413862,0.004886147907627405,0 piqa,acc,0.7437431991294886,0.010185787831565062,0 piqa,acc_norm,0.749727965179543,0.010106561880089768,0 rte,acc,0.5740072202166066,0.029764956741777645,0 sciq,acc,0.818,0.012207580637662157,0 sciq,acc_norm,0.737,0.013929286594259734,0 storycloze_2016,acc,0.709246392303581,0.010501233625213081,0 winogrande,acc,0.5816890292028414,0.013863669961195892,0