task,metric,value,err,version anli_r1,acc,0.315,0.014696631960792506,0 anli_r2,acc,0.307,0.014593284892852628,0 anli_r3,acc,0.3375,0.013655897185463657,0 arc_challenge,acc,0.22696245733788395,0.012240491536132866,0 arc_challenge,acc_norm,0.2645051194539249,0.012889272949313368,0 arc_easy,acc,0.5269360269360269,0.010244884740620108,0 arc_easy,acc_norm,0.4962121212121212,0.010259489101351845,0 boolq,acc,0.5152905198776758,0.008740964832224338,1 cb,acc,0.48214285714285715,0.06737697508644648,1 cb,f1,0.3421052631578947,,1 copa,acc,0.66,0.04760952285695237,0 hellaswag,acc,0.36168094005178253,0.00479505103791772,0 hellaswag,acc_norm,0.44363672575184226,0.004957976789260528,0 piqa,acc,0.690424374319913,0.010786656752183345,0 piqa,acc_norm,0.6936887921653971,0.010754970032367321,0 rte,acc,0.5270758122743683,0.0300523034631437,0 sciq,acc,0.87,0.010640169792499344,0 sciq,acc_norm,0.861,0.010945263761042965,0 storycloze_2016,acc,0.632816675574559,0.011147041781368648,0 winogrande,acc,0.5248618784530387,0.01403510288362775,0