task,metric,value,err,version anli_r1,acc,0.332,0.014899597242811475,0 anli_r2,acc,0.333,0.014910846164229864,0 anli_r3,acc,0.34,0.013680495725767797,0 arc_challenge,acc,0.295221843003413,0.01332975029338232,0 arc_challenge,acc_norm,0.30802047781569963,0.013491429517292038,0 arc_easy,acc,0.6195286195286195,0.009962305992058577,0 arc_easy,acc_norm,0.5959595959595959,0.01006906164954955,0 boolq,acc,0.6180428134556575,0.00849785199842719,1 cb,acc,0.35714285714285715,0.06460957383809218,1 cb,f1,0.2528248587570622,,1 copa,acc,0.77,0.04229525846816506,0 hellaswag,acc,0.46932881896036643,0.004980384575535383,0 hellaswag,acc_norm,0.6172077275443139,0.0048507486878599185,0 piqa,acc,0.7464635473340587,0.010150090834551794,0 piqa,acc_norm,0.7573449401523396,0.010002002569708688,0 rte,acc,0.4693140794223827,0.03003973059219781,0 sciq,acc,0.896,0.009658016218524301,0 sciq,acc_norm,0.871,0.010605256784796586,0 storycloze_2016,acc,0.7183324425440941,0.010401844358587665,0 winogrande,acc,0.5927387529597474,0.013808654122417862,0