task,metric,value,err,version anli_r1,acc,0.33,0.014876872027456732,0 anli_r2,acc,0.318,0.0147340793093119,0 anli_r3,acc,0.3458333333333333,0.013736245342311014,0 arc_challenge,acc,0.24914675767918087,0.012639407111926442,0 arc_challenge,acc_norm,0.2832764505119454,0.013167478735134575,0 arc_easy,acc,0.5606060606060606,0.010184134315437668,0 arc_easy,acc_norm,0.5286195286195287,0.010242962617927199,0 boolq,acc,0.5311926605504587,0.008728020822889253,1 cb,acc,0.42857142857142855,0.06672848092813057,1 cb,f1,0.29090447154471544,,1 copa,acc,0.7,0.046056618647183814,0 hellaswag,acc,0.3601872137024497,0.004790734683704592,0 hellaswag,acc_norm,0.4427404899422426,0.004956953917781312,0 piqa,acc,0.7154515778019587,0.010527218464130622,0 piqa,acc_norm,0.719804134929271,0.010478122015577098,0 rte,acc,0.5667870036101083,0.02982676408213827,0 sciq,acc,0.814,0.0123107902084128,0 sciq,acc_norm,0.788,0.012931481864938029,0 storycloze_2016,acc,0.6023516835916622,0.011317586826972788,0 winogrande,acc,0.5359116022099447,0.014016193433958312,0