task,metric,value,err,version anli_r1,acc,0.328,0.014853842487270336,0 anli_r2,acc,0.33,0.014876872027456738,0 anli_r3,acc,0.35083333333333333,0.013782212417178204,0 arc_challenge,acc,0.26791808873720135,0.012942030195136426,0 arc_challenge,acc_norm,0.2858361774744027,0.013203196088537369,0 arc_easy,acc,0.5761784511784511,0.010140006095213603,0 arc_easy,acc_norm,0.5374579124579124,0.010230952104570798,0 boolq,acc,0.5737003058103975,0.008649531625805671,1 cb,acc,0.5,0.06741998624632421,1 cb,f1,0.34491725768321513,,1 copa,acc,0.73,0.04461960433384741,0 hellaswag,acc,0.4425413264289982,0.004956724392646535,0 hellaswag,acc_norm,0.5784704242182832,0.00492794806148606,0 piqa,acc,0.7437431991294886,0.010185787831565056,0 piqa,acc_norm,0.7383025027203483,0.010255630772708229,0 rte,acc,0.5018050541516246,0.030096267148976626,0 sciq,acc,0.836,0.011715000693181316,0 sciq,acc_norm,0.788,0.01293148186493804,0 storycloze_2016,acc,0.6937466595403528,0.010659088460112754,0 winogrande,acc,0.5493291239147593,0.01398392886904024,0