task,metric,value,err,version anli_r1,acc,0.311,0.014645596385722692,0 anli_r2,acc,0.357,0.015158521721486773,0 anli_r3,acc,0.3233333333333333,0.01350837286730022,0 arc_challenge,acc,0.2645051194539249,0.012889272949313366,0 arc_challenge,acc_norm,0.2883959044368601,0.01323839442242817,0 arc_easy,acc,0.5925925925925926,0.010082326627832865,0 arc_easy,acc_norm,0.5656565656565656,0.010170943451269421,0 boolq,acc,0.5776758409785933,0.008638883260317736,1 cb,acc,0.5178571428571429,0.06737697508644647,1 cb,f1,0.4390740542278427,,1 copa,acc,0.75,0.04351941398892446,0 hellaswag,acc,0.4457279426409082,0.004960299952519407,0 hellaswag,acc_norm,0.5819557857000598,0.004922294797766662,0 piqa,acc,0.7328618063112078,0.010323440492612445,0 piqa,acc_norm,0.7437431991294886,0.010185787831565058,0 rte,acc,0.5884476534296029,0.029621832222417196,0 sciq,acc,0.863,0.010878848714333316,0 sciq,acc_norm,0.821,0.012128730605719113,0 storycloze_2016,acc,0.6926777124532336,0.010669445081866662,0 winogrande,acc,0.5367008681925809,0.01401457845884326,0