task,metric,value,err,version anli_r1,acc,0.311,0.014645596385722694,0 anli_r2,acc,0.299,0.014484778521220477,0 anli_r3,acc,0.335,0.013630871843821474,0 arc_challenge,acc,0.28924914675767915,0.013250012579393443,0 arc_challenge,acc_norm,0.318259385665529,0.013611993916971453,0 arc_easy,acc,0.6401515151515151,0.009848484848484843,0 arc_easy,acc_norm,0.6346801346801347,0.009880576614806924,0 boolq,acc,0.6241590214067279,0.008471147248160114,1 cb,acc,0.5178571428571429,0.06737697508644647,1 cb,f1,0.43401043401043404,,1 copa,acc,0.82,0.038612291966536955,0 hellaswag,acc,0.45140410276837284,0.004966158142645416,0 hellaswag,acc_norm,0.601274646484764,0.0048863535635718415,0 piqa,acc,0.7453754080522307,0.010164432237060487,0 piqa,acc_norm,0.7448313384113167,0.010171571592521834,0 rte,acc,0.49097472924187724,0.030091559826331334,0 sciq,acc,0.927,0.008230354715244055,0 sciq,acc_norm,0.928,0.008178195576218681,0 storycloze_2016,acc,0.7097808658471406,0.010495529690730063,0 winogrande,acc,0.590370955011839,0.013821049109655491,0