task,metric,value,err,version anli_r1,acc,0.346,0.01505026612756444,0 anli_r2,acc,0.338,0.01496596071022448,0 anli_r3,acc,0.3516666666666667,0.013789711695404789,0 arc_challenge,acc,0.2909556313993174,0.01327307786590759,0 arc_challenge,acc_norm,0.318259385665529,0.013611993916971453,0 arc_easy,acc,0.6308922558922558,0.009901987410242738,0 arc_easy,acc_norm,0.617003367003367,0.00997492038453648,0 boolq,acc,0.6269113149847095,0.008458661252058382,1 cb,acc,0.48214285714285715,0.0673769750864465,1 cb,f1,0.3115193264446996,,1 copa,acc,0.79,0.040936018074033256,0 hellaswag,acc,0.4707229635530771,0.0049812201358823294,0 hellaswag,acc_norm,0.6292571200955985,0.004820166002253066,0 piqa,acc,0.7562568008705114,0.010017199471500617,0 piqa,acc_norm,0.766050054406964,0.00987723689513744,0 rte,acc,0.5884476534296029,0.029621832222417196,0 sciq,acc,0.911,0.009008893392651526,0 sciq,acc_norm,0.905,0.0092769101031033,0 storycloze_2016,acc,0.7242116515232496,0.010334748387645675,0 winogrande,acc,0.5722178374112076,0.013905134013839953,0