task,metric,value,err,version anli_r1,acc,0.332,0.014899597242811483,0 anli_r2,acc,0.344,0.015029633724408947,0 anli_r3,acc,0.3375,0.013655897185463653,0 arc_challenge,acc,0.22184300341296928,0.012141659068147884,0 arc_challenge,acc_norm,0.23720136518771331,0.01243039982926085,0 arc_easy,acc,0.4663299663299663,0.010236494647406476,0 arc_easy,acc_norm,0.4591750841750842,0.010225526906982606,0 boolq,acc,0.43730886850152906,0.008676043429497427,1 cb,acc,0.4107142857142857,0.0663363415035954,1 cb,f1,0.285919540229885,,1 copa,acc,0.68,0.04688261722621505,0 hellaswag,acc,0.39912368054172476,0.004887174080003037,0 hellaswag,acc_norm,0.4940250946026688,0.0049894251333779055,0 piqa,acc,0.6996735582154516,0.010695225308183138,0 piqa,acc_norm,0.7018498367791077,0.01067296411400829,0 rte,acc,0.48014440433212996,0.0300727231673172,0 sciq,acc,0.737,0.013929286594259741,0 sciq,acc_norm,0.679,0.014770821817934649,0 storycloze_2016,acc,0.6451095670764297,0.011064787659904119,0 winogrande,acc,0.526440410418311,0.01403282387440722,0