task,metric,value,err,version anli_r1,acc,0.314,0.01468399195108795,0 anli_r2,acc,0.337,0.014955087918653609,0 anli_r3,acc,0.35,0.013774667009018558,0 arc_challenge,acc,0.29180887372013653,0.013284525292403506,0 arc_challenge,acc_norm,0.31313993174061433,0.013552671543623494,0 arc_easy,acc,0.6043771043771043,0.010033741393430983,0 arc_easy,acc_norm,0.5925925925925926,0.010082326627832861,0 boolq,acc,0.617737003058104,0.008499149690449273,1 cb,acc,0.42857142857142855,0.06672848092813057,1 cb,f1,0.31174851513834567,,1 copa,acc,0.75,0.04351941398892446,0 hellaswag,acc,0.46395140410276836,0.004976796060456438,0 hellaswag,acc_norm,0.6093407687711612,0.0048690101522807505,0 piqa,acc,0.7453754080522307,0.01016443223706049,0 piqa,acc_norm,0.7404787812840044,0.010227939888173923,0 rte,acc,0.592057761732852,0.029581952519606197,0 sciq,acc,0.876,0.010427498872343961,0 sciq,acc_norm,0.871,0.010605256784796565,0 storycloze_2016,acc,0.7044361304115446,0.010551778839373784,0 winogrande,acc,0.5572217837411207,0.013960157350784978,0