task,metric,value,err,version anli_r1,acc,0.322,0.014782913600996653,0 anli_r2,acc,0.349,0.015080663991563098,0 anli_r3,acc,0.3233333333333333,0.013508372867300219,0 arc_challenge,acc,0.29266211604095566,0.01329591610361943,0 arc_challenge,acc_norm,0.3302047781569966,0.013743085603760427,0 arc_easy,acc,0.6245791245791246,0.0099362185271143,0 arc_easy,acc_norm,0.6052188552188552,0.01003003893588359,0 boolq,acc,0.6128440366972477,0.008519429207594416,1 cb,acc,0.42857142857142855,0.06672848092813058,1 cb,f1,0.2588235294117647,,1 copa,acc,0.82,0.038612291966536955,0 hellaswag,acc,0.4731129257120096,0.004982561815214124,0 hellaswag,acc_norm,0.6232822146982673,0.00483572890373141,0 piqa,acc,0.7453754080522307,0.01016443223706049,0 piqa,acc_norm,0.7611534276387377,0.009948120385337484,0 rte,acc,0.48375451263537905,0.030080573208738064,0 sciq,acc,0.914,0.008870325962594766,0 sciq,acc_norm,0.889,0.009938701010583726,0 storycloze_2016,acc,0.7172634954569749,0.01041380648612127,0 winogrande,acc,0.5974743488555643,0.01378286683170305,0