task,metric,value,err,version anli_r1,acc,0.346,0.015050266127564438,0 anli_r2,acc,0.364,0.015222868840522024,0 anli_r3,acc,0.3325,0.013605417345710526,0 arc_challenge,acc,0.2832764505119454,0.013167478735134575,0 arc_challenge,acc_norm,0.3037542662116041,0.01343890918477876,0 arc_easy,acc,0.5989057239057239,0.010057051106534372,0 arc_easy,acc_norm,0.5812289562289562,0.010123487160167819,0 boolq,acc,0.5981651376146789,0.008574857171671134,1 cb,acc,0.44642857142857145,0.067031892279424,1 cb,f1,0.31977105885280577,,1 copa,acc,0.8,0.04020151261036844,0 hellaswag,acc,0.44851623182632944,0.004963259311700562,0 hellaswag,acc_norm,0.5903206532563234,0.004907694727935689,0 piqa,acc,0.7426550598476604,0.01019992106479251,0 piqa,acc_norm,0.7535364526659413,0.010054810789671811,0 rte,acc,0.5523465703971119,0.02993107036293953,0 sciq,acc,0.899,0.009533618929341002,0 sciq,acc_norm,0.872,0.010570133761108658,0 storycloze_2016,acc,0.706574024585783,0.010529489334744471,0 winogrande,acc,0.5643251775848461,0.01393570973961571,0