task,metric,value,err,version anli_r1,acc,0.331,0.014888272588203926,0 anli_r2,acc,0.339,0.01497675877162035,0 anli_r3,acc,0.34,0.013680495725767794,0 arc_challenge,acc,0.26109215017064846,0.012835523909473848,0 arc_challenge,acc_norm,0.2858361774744027,0.013203196088537367,0 arc_easy,acc,0.5845959595959596,0.010111869494911517,0 arc_easy,acc_norm,0.5555555555555556,0.01019625483869168,0 boolq,acc,0.48807339449541287,0.008742566760633421,1 cb,acc,0.3392857142857143,0.06384226561930825,1 cb,f1,0.31372797744890774,,1 copa,acc,0.79,0.040936018074033256,0 hellaswag,acc,0.4364668392750448,0.0049493353568818635,0 hellaswag,acc_norm,0.5734913363871739,0.004935587729948866,0 piqa,acc,0.7404787812840044,0.01022793988817392,0 piqa,acc_norm,0.7529923830250272,0.010062268140772625,0 rte,acc,0.48014440433212996,0.0300727231673172,0 sciq,acc,0.857,0.01107581480856704,0 sciq,acc_norm,0.842,0.011539894677559564,0 storycloze_2016,acc,0.7172634954569749,0.010413806486121271,0 winogrande,acc,0.5627466456195738,0.013941393310695922,0