task,metric,value,err,version anli_r1,acc,0.316,0.014709193056057114,0 anli_r2,acc,0.341,0.014998131348402704,0 anli_r3,acc,0.3375,0.013655897185463657,0 arc_challenge,acc,0.2960750853242321,0.013340916085246266,0 arc_challenge,acc_norm,0.32081911262798635,0.013640943091946528,0 arc_easy,acc,0.6342592592592593,0.00988298806941883,0 arc_easy,acc_norm,0.6321548821548821,0.00989492346445519,0 boolq,acc,0.6394495412844037,0.00839805483449795,1 cb,acc,0.5178571428571429,0.06737697508644647,1 cb,f1,0.3271557271557271,,1 copa,acc,0.75,0.04351941398892446,0 hellaswag,acc,0.47540330611431986,0.0049837401452186075,0 hellaswag,acc_norm,0.6369249153555069,0.004799034356969407,0 piqa,acc,0.7546245919477693,0.010039831320422396,0 piqa,acc_norm,0.7616974972796517,0.009940334245876224,0 rte,acc,0.555956678700361,0.029907396333795983,0 sciq,acc,0.92,0.008583336977753655,0 sciq,acc_norm,0.91,0.009054390204866442,0 storycloze_2016,acc,0.7247461250668092,0.01032853840050057,0 winogrande,acc,0.5832675611681136,0.01385625007279632,0