task,metric,value,err,version anli_r1,acc,0.331,0.014888272588203945,0 anli_r2,acc,0.35,0.015090650341444231,0 anli_r3,acc,0.33416666666666667,0.013622434813136769,0 arc_challenge,acc,0.28924914675767915,0.013250012579393443,0 arc_challenge,acc_norm,0.30887372013651876,0.013501770929344003,0 arc_easy,acc,0.6102693602693603,0.01000716939179705,0 arc_easy,acc_norm,0.5993265993265994,0.010055304474255582,0 boolq,acc,0.5519877675840978,0.008697655510897228,1 cb,acc,0.375,0.06527912098338669,1 cb,f1,0.26182156999767064,,1 copa,acc,0.75,0.04351941398892446,0 hellaswag,acc,0.468034256124278,0.0049795737655758555,0 hellaswag,acc_norm,0.6188010356502689,0.00484688692976345,0 piqa,acc,0.7529923830250272,0.010062268140772622,0 piqa,acc_norm,0.7584330794341676,0.00998671800180446,0 rte,acc,0.5342960288808665,0.03002557981936643,0 sciq,acc,0.883,0.010169287802713329,0 sciq,acc_norm,0.865,0.010811655372416053,0 storycloze_2016,acc,0.7151256012827365,0.01043751398661172,0 winogrande,acc,0.5769534333070244,0.013885055359056472,0