task,metric,value,err,version anli_r1,acc,0.346,0.015050266127564448,0 anli_r2,acc,0.328,0.01485384248727033,0 anli_r3,acc,0.3258333333333333,0.01353542204341746,0 arc_challenge,acc,0.2295221843003413,0.012288926760890788,0 arc_challenge,acc_norm,0.2551194539249147,0.012739038695202104,0 arc_easy,acc,0.5538720538720538,0.010200057828765008,0 arc_easy,acc_norm,0.5336700336700336,0.010236494647406476,0 boolq,acc,0.5299694189602446,0.0087293318183149,1 cb,acc,0.5357142857142857,0.06724777654937658,1 cb,f1,0.37160083671711575,,1 copa,acc,0.71,0.045604802157206845,0 hellaswag,acc,0.3690499900418243,0.0048156131443854,0 hellaswag,acc_norm,0.4470225054769966,0.004961693567208813,0 piqa,acc,0.7040261153427638,0.010650414317148119,0 piqa,acc_norm,0.6996735582154516,0.010695225308183143,0 rte,acc,0.49097472924187724,0.030091559826331334,0 sciq,acc,0.901,0.009449248027662742,0 sciq,acc_norm,0.899,0.009533618929340988,0 storycloze_2016,acc,0.6397648316408338,0.011101519668493523,0 winogrande,acc,0.5280189423835833,0.014030404213405788,0