task,metric,value,err,version anli_r1,acc,0.336,0.014944140233795023,0 anli_r2,acc,0.333,0.014910846164229864,0 anli_r3,acc,0.3516666666666667,0.013789711695404785,0 arc_challenge,acc,0.27559726962457337,0.01305716965576184,0 arc_challenge,acc_norm,0.30631399317406144,0.013470584417276511,0 arc_easy,acc,0.6018518518518519,0.010044662374653398,0 arc_easy,acc_norm,0.5214646464646465,0.010250325159456652,0 boolq,acc,0.6100917431192661,0.008530437972862622,1 cb,acc,0.2857142857142857,0.06091449038731724,1 cb,f1,0.24845800389121164,,1 copa,acc,0.78,0.04163331998932261,0 hellaswag,acc,0.47918741286596295,0.004985456752161002,0 hellaswag,acc_norm,0.6287592113124876,0.004821492994082102,0 piqa,acc,0.750816104461371,0.010091882770120216,0 piqa,acc_norm,0.7616974972796517,0.009940334245876219,0 rte,acc,0.5306859205776173,0.03003973059219781,0 sciq,acc,0.852,0.01123486636423524,0 sciq,acc_norm,0.768,0.01335493745228157,0 storycloze_2016,acc,0.7087119187600214,0.010506919924163614,0 winogrande,acc,0.5816890292028414,0.013863669961195904,0