task,metric,value,err,version anli_r1,acc,0.348,0.015070604603768408,0 anli_r2,acc,0.344,0.015029633724408948,0 anli_r3,acc,0.3383333333333333,0.013664144006618271,0 arc_challenge,acc,0.28924914675767915,0.013250012579393443,0 arc_challenge,acc_norm,0.31313993174061433,0.01355267154362349,0 arc_easy,acc,0.6043771043771043,0.010033741393430986,0 arc_easy,acc_norm,0.5677609427609428,0.010165130379698746,0 boolq,acc,0.6058103975535168,0.008546995661233635,1 cb,acc,0.4107142857142857,0.0663363415035954,1 cb,f1,0.21777777777777776,,1 copa,acc,0.71,0.04560480215720684,0 hellaswag,acc,0.46285600477992433,0.004975993795562031,0 hellaswag,acc_norm,0.6014738099980084,0.004885942040894556,0 piqa,acc,0.7388465723612623,0.01024873864993558,0 piqa,acc_norm,0.7399347116430903,0.010234893249061282,0 rte,acc,0.5415162454873647,0.029992535385373314,0 sciq,acc,0.856,0.01110798754893915,0 sciq,acc_norm,0.829,0.011912216456264614,0 storycloze_2016,acc,0.694815606627472,0.010648664383985658,0 winogrande,acc,0.5753749013417522,0.01389189315026423,0