task,metric,value,err,version anli_r1,acc,0.356,0.01514904265930663,0 anli_r2,acc,0.363,0.015213890444671285,0 anli_r3,acc,0.3625,0.01388303787422552,0 arc_challenge,acc,0.2363481228668942,0.012414960524301836,0 arc_challenge,acc_norm,0.26706484641638223,0.012928933196496357,0 arc_easy,acc,0.4852693602693603,0.010255329977562098,0 arc_easy,acc_norm,0.47769360269360267,0.01024956840455565,0 boolq,acc,0.5339449541284403,0.008724878548525217,1 cb,acc,0.5,0.06741998624632421,1 cb,f1,0.3261261261261261,,1 copa,acc,0.65,0.047937248544110196,0 hellaswag,acc,0.39075881298546106,0.0048692327581033226,0 hellaswag,acc_norm,0.4880501892053376,0.004988356146499007,0 piqa,acc,0.6996735582154516,0.010695225308183133,0 piqa,acc_norm,0.6926006528835691,0.010765602506939061,0 rte,acc,0.5090252707581228,0.030091559826331334,0 sciq,acc,0.763,0.01345407046257795,0 sciq,acc_norm,0.733,0.013996674851796275,0 storycloze_2016,acc,0.6461785141635489,0.011057260832171067,0 winogrande,acc,0.5153906866614049,0.014045826789783663,0