task,metric,value,err,version anli_r1,acc,0.295,0.014428554438445512,0 anli_r2,acc,0.312,0.014658474370509007,0 anli_r3,acc,0.33416666666666667,0.013622434813136769,0 arc_challenge,acc,0.3250853242320819,0.013688147309729122,0 arc_challenge,acc_norm,0.34812286689419797,0.013921008595179333,0 arc_easy,acc,0.6805555555555556,0.009567482017268095,0 arc_easy,acc_norm,0.6565656565656566,0.00974381736896003,0 boolq,acc,0.6626911314984709,0.008269171495741622,1 cb,acc,0.21428571428571427,0.055328333517248834,1 cb,f1,0.1865942028985507,,1 copa,acc,0.83,0.03775251680686371,0 hellaswag,acc,0.5261900019916351,0.0049829315659459545,0 hellaswag,acc_norm,0.702549292969528,0.004562022467161891,0 piqa,acc,0.7709466811751904,0.009804509865175504,0 piqa,acc_norm,0.7856365614798694,0.009574842136050964,0 rte,acc,0.5342960288808665,0.030025579819366426,0 sciq,acc,0.933,0.007910345983177549,0 sciq,acc_norm,0.92,0.008583336977753655,0 storycloze_2016,acc,0.7536076964190273,0.009964727533753548,0 winogrande,acc,0.6250986582478295,0.013605544523788,0