task,metric,value,err,version anli_r1,acc,0.335,0.014933117490932575,0 anli_r2,acc,0.333,0.01491084616422986,0 anli_r3,acc,0.33416666666666667,0.013622434813136781,0 arc_challenge,acc,0.2619453924914676,0.012849054826858115,0 arc_challenge,acc_norm,0.29180887372013653,0.013284525292403508,0 arc_easy,acc,0.5888047138047138,0.010096663811817681,0 arc_easy,acc_norm,0.5197811447811448,0.010251751199542738,0 boolq,acc,0.5984709480122324,0.008573784490094752,1 cb,acc,0.42857142857142855,0.06672848092813058,1 cb,f1,0.21956970232832299,,1 copa,acc,0.68,0.046882617226215034,0 hellaswag,acc,0.4700258912567218,0.00498080723113675,0 hellaswag,acc_norm,0.6167098187612029,0.00485194417067125,0 piqa,acc,0.7519042437431991,0.010077118315574719,0 piqa,acc_norm,0.7568008705114254,0.01000961195385892,0 rte,acc,0.5270758122743683,0.030052303463143706,0 sciq,acc,0.845,0.01145015747079947,0 sciq,acc_norm,0.766,0.01339490288966001,0 storycloze_2016,acc,0.7167290219134153,0.010419760409155363,0 winogrande,acc,0.595895816890292,0.013791610664670858,0