{ "results": { "anli_r1": { "acc": 0.333, "acc_stderr": 0.014910846164229863 }, "anli_r2": { "acc": 0.338, "acc_stderr": 0.014965960710224494 }, "anli_r3": { "acc": 0.3308333333333333, "acc_stderr": 0.013588208070708995 }, "cb": { "acc": 0.48214285714285715, "acc_stderr": 0.0673769750864465, "f1": 0.3082942097026604 }, "copa": { "acc": 0.72, "acc_stderr": 0.04512608598542127 }, "hellaswag": { "acc": 0.4334793865763792, "acc_stderr": 0.004945424771611596, "acc_norm": 0.560744871539534, "acc_norm_stderr": 0.0049528205388318985 }, "rte": { "acc": 0.5487364620938628, "acc_stderr": 0.029953149241808943 }, "winogrande": { "acc": 0.5461720599842147, "acc_stderr": 0.013992441563707068 }, "storycloze_2016": { "acc": 0.6910742918225548, "acc_stderr": 0.010684853966268454 }, "boolq": { "acc": 0.5963302752293578, "acc_stderr": 0.008581220435616816 }, "arc_easy": { "acc": 0.5458754208754208, "acc_stderr": 0.010216507710244106, "acc_norm": 0.49074074074074076, "acc_norm_stderr": 0.010258024147860673 }, "arc_challenge": { "acc": 0.25, "acc_stderr": 0.012653835621466646, "acc_norm": 0.28071672354948807, "acc_norm_stderr": 0.013131238126975578 }, "sciq": { "acc": 0.814, "acc_stderr": 0.012310790208412803, "acc_norm": 0.711, "acc_norm_stderr": 0.014341711358296177 }, "piqa": { "acc": 0.7399347116430903, "acc_stderr": 0.0102348932490613, "acc_norm": 0.7426550598476604, "acc_norm_stderr": 0.01019992106479251 } }, "versions": { "anli_r1": 0, "anli_r2": 0, "anli_r3": 0, "cb": 1, "copa": 0, "hellaswag": 0, "rte": 0, "winogrande": 0, "storycloze_2016": 0, "boolq": 1, "arc_easy": 0, "arc_challenge": 0, "sciq": 0, "piqa": 0 } }