task,metric,value,err,version anli_r1,acc,0.316,0.014709193056057128,0 anli_r2,acc,0.333,0.014910846164229863,0 anli_r3,acc,0.35,0.013774667009018554,0 arc_challenge,acc,0.2815699658703072,0.013143376735009024,0 arc_challenge,acc_norm,0.30802047781569963,0.013491429517292038,0 arc_easy,acc,0.627104377104377,0.009922743197129257,0 arc_easy,acc_norm,0.5955387205387206,0.010070746648278795,0 boolq,acc,0.5896024464831804,0.008603488048617521,1 cb,acc,0.30357142857142855,0.06199938655510754,1 cb,f1,0.24554767533490937,,1 copa,acc,0.78,0.04163331998932261,0 hellaswag,acc,0.4780920135431189,0.004984989320648131,0 hellaswag,acc_norm,0.6307508464449313,0.004816152074023083,0 piqa,acc,0.7486398258977149,0.010121156016819257,0 piqa,acc_norm,0.7611534276387377,0.009948120385337485,0 rte,acc,0.5812274368231047,0.029696661081234824,0 sciq,acc,0.901,0.009449248027662765,0 sciq,acc_norm,0.884,0.010131468138756997,0 storycloze_2016,acc,0.7188669160876536,0.010395836091628103,0 winogrande,acc,0.5840568271507498,0.013852485356798252,0