task,metric,value,err,version anli_r1,acc,0.331,0.014888272588203938,0 anli_r2,acc,0.324,0.014806864733738863,0 anli_r3,acc,0.3416666666666667,0.013696658778002519,0 arc_challenge,acc,0.3370307167235495,0.013813476652902265,0 arc_challenge,acc_norm,0.35665529010238906,0.013998056902620203,0 arc_easy,acc,0.686026936026936,0.00952324533521551,0 arc_easy,acc_norm,0.6628787878787878,0.009700146509130068,0 boolq,acc,0.6467889908256881,0.008359705247064296,1 cb,acc,0.14285714285714285,0.047184161362558305,1 cb,f1,0.1381769825918762,,1 copa,acc,0.83,0.03775251680686371,0 hellaswag,acc,0.5295757817167894,0.004981044370530809,0 hellaswag,acc_norm,0.7048396733718383,0.0045518262729780596,0 piqa,acc,0.7742110990206746,0.009754980670917315,0 piqa,acc_norm,0.7867247007616975,0.00955712122586134,0 rte,acc,0.49458483754512633,0.030094698123239966,0 sciq,acc,0.938,0.0076298239962803065,0 sciq,acc_norm,0.918,0.00868051561552373,0 storycloze_2016,acc,0.7514698022447889,0.009993659448666372,0 winogrande,acc,0.611681136543015,0.013697456658457232,0