task,metric,value,err,version anli_r1,acc,0.339,0.01497675877162034,0 anli_r2,acc,0.336,0.014944140233795027,0 anli_r3,acc,0.3358333333333333,0.01363926119093288,0 arc_challenge,acc,0.1885665529010239,0.011430897647675803,0 arc_challenge,acc_norm,0.22610921501706485,0.01222420209706328,0 arc_easy,acc,0.43308080808080807,0.010167478013701799,0 arc_easy,acc_norm,0.38173400673400676,0.009968648851839667,0 boolq,acc,0.5944954128440367,0.008587459055441612,1 cb,acc,0.4107142857142857,0.0663363415035954,1 cb,f1,0.1940928270042194,,1 copa,acc,0.63,0.04852365870939099,0 hellaswag,acc,0.297450707030472,0.004562022467161891,0 hellaswag,acc_norm,0.32374029077872934,0.004669459891917689,0 piqa,acc,0.6158868335146899,0.011348160741479148,0 piqa,acc_norm,0.6218715995647442,0.011313980666854533,0 rte,acc,0.5234657039711191,0.03006330041190266,0 sciq,acc,0.735,0.013963164754809953,0 sciq,acc_norm,0.668,0.014899597242811476,0 storycloze_2016,acc,0.5905932656333511,0.01137105952719707,0 winogrande,acc,0.5090765588003157,0.014050170094497707,0