task,metric,value,err,version anli_r1,acc,0.309,0.01461960097720649,0 anli_r2,acc,0.336,0.014944140233795018,0 anli_r3,acc,0.34833333333333333,0.013759437498874075,0 arc_challenge,acc,0.2815699658703072,0.013143376735009022,0 arc_challenge,acc_norm,0.3054607508532423,0.013460080478002498,0 arc_easy,acc,0.5753367003367004,0.010142653687480416,0 arc_easy,acc_norm,0.5513468013468014,0.010205540414612871,0 boolq,acc,0.617737003058104,0.008499149690449273,1 cb,acc,0.44642857142857145,0.06703189227942398,1 cb,f1,0.3456203829338158,,1 copa,acc,0.77,0.04229525846816506,0 hellaswag,acc,0.46036646086436966,0.004974080638364265,0 hellaswag,acc_norm,0.6097390957976498,0.004868117598481941,0 piqa,acc,0.7377584330794341,0.01026250256517245,0 piqa,acc_norm,0.7404787812840044,0.010227939888173923,0 rte,acc,0.5126353790613718,0.030086851767188564,0 sciq,acc,0.848,0.01135891830347528,0 sciq,acc_norm,0.845,0.011450157470799475,0 storycloze_2016,acc,0.692143238909674,0.010674598158758186,0 winogrande,acc,0.5627466456195738,0.013941393310695924,0