task,metric,value,err,version anli_r1,acc,0.325,0.014818724459095526,0 anli_r2,acc,0.33,0.014876872027456738,0 anli_r3,acc,0.32666666666666666,0.013544340907003663,0 arc_challenge,acc,0.28754266211604096,0.013226719056266129,0 arc_challenge,acc_norm,0.3250853242320819,0.013688147309729122,0 arc_easy,acc,0.6376262626262627,0.009863468202583775,0 arc_easy,acc_norm,0.6338383838383839,0.009885391390947709,0 boolq,acc,0.6253822629969419,0.008465633983431928,1 cb,acc,0.5714285714285714,0.06672848092813058,1 cb,f1,0.42867867867867865,,1 copa,acc,0.77,0.042295258468165065,0 hellaswag,acc,0.4495120493925513,0.004964277999318813,0 hellaswag,acc_norm,0.5978888667596096,0.004893220635011786,0 piqa,acc,0.7383025027203483,0.010255630772708227,0 piqa,acc_norm,0.735038084874864,0.010296557993316047,0 rte,acc,0.5234657039711191,0.030063300411902652,0 sciq,acc,0.926,0.008282064512704159,0 sciq,acc_norm,0.927,0.008230354715244066,0 storycloze_2016,acc,0.6985569214323891,0.010611646032767584,0 winogrande,acc,0.5722178374112076,0.013905134013839951,0