task,metric,value,err,version anli_r1,acc,0.317,0.01472167543888022,0 anli_r2,acc,0.337,0.014955087918653605,0 anli_r3,acc,0.33416666666666667,0.013622434813136783,0 arc_challenge,acc,0.3310580204778157,0.013752062419817836,0 arc_challenge,acc_norm,0.36689419795221845,0.014084133118104292,0 arc_easy,acc,0.6813973063973064,0.009560775507673366,0 arc_easy,acc_norm,0.6641414141414141,0.009691180932083508,0 boolq,acc,0.65565749235474,0.008310485054782981,1 cb,acc,0.4642857142857143,0.06724777654937658,1 cb,f1,0.38713450292397655,,1 copa,acc,0.83,0.03775251680686371,0 hellaswag,acc,0.5299741087432782,0.004980807231136743,0 hellaswag,acc_norm,0.7079267078271261,0.004537865171414025,0 piqa,acc,0.7845484221980413,0.009592463115658117,0 piqa,acc_norm,0.7899891186071817,0.009503353305818578,0 rte,acc,0.5884476534296029,0.029621832222417196,0 sciq,acc,0.939,0.007572076091557426,0 sciq,acc_norm,0.926,0.008282064512704156,0 storycloze_2016,acc,0.7659005879208979,0.009791868211495304,0 winogrande,acc,0.6353591160220995,0.013527746622429837,0