task,metric,value,err,version anli_r1,acc,0.334,0.014922019523732967,0 anli_r2,acc,0.325,0.014818724459095527,0 anli_r3,acc,0.3441666666666667,0.013720551062295756,0 arc_challenge,acc,0.26023890784982934,0.012821930225112568,0 arc_challenge,acc_norm,0.2790102389078498,0.01310678488360133,0 arc_easy,acc,0.5660774410774411,0.010169795770462111,0 arc_easy,acc_norm,0.5084175084175084,0.010258329515226459,0 boolq,acc,0.591131498470948,0.008598573693259106,1 cb,acc,0.4107142857142857,0.0663363415035954,1 cb,f1,0.1940928270042194,,1 copa,acc,0.79,0.040936018074033256,0 hellaswag,acc,0.46415056761601275,0.0049769393332400776,0 hellaswag,acc_norm,0.6052579167496515,0.0048779626449918555,0 piqa,acc,0.7404787812840044,0.01022793988817392,0 piqa,acc_norm,0.7431991294885746,0.01019286480227804,0 rte,acc,0.5270758122743683,0.0300523034631437,0 sciq,acc,0.829,0.011912216456264607,0 sciq,acc_norm,0.751,0.013681600278702301,0 storycloze_2016,acc,0.7151256012827365,0.010437513986611718,0 winogrande,acc,0.5824782951854776,0.013859978264440251,0