task,metric,value,err,version anli_r1,acc,0.325,0.014818724459095526,0 anli_r2,acc,0.335,0.014933117490932577,0 anli_r3,acc,0.3383333333333333,0.013664144006618268,0 arc_challenge,acc,0.3037542662116041,0.013438909184778759,0 arc_challenge,acc_norm,0.3267918088737201,0.013706665975587338,0 arc_easy,acc,0.6321548821548821,0.009894923464455191,0 arc_easy,acc_norm,0.6077441077441077,0.010018744689650043,0 boolq,acc,0.6284403669724771,0.008451598145076598,1 cb,acc,0.21428571428571427,0.055328333517248834,1 cb,f1,0.20694283133307526,,1 copa,acc,0.77,0.042295258468165065,0 hellaswag,acc,0.4749053973312089,0.004983492928102842,0 hellaswag,acc_norm,0.6303525194184425,0.004817227292240292,0 piqa,acc,0.7535364526659413,0.010054810789671824,0 piqa,acc_norm,0.7665941240478781,0.009869247889520998,0 rte,acc,0.5126353790613718,0.030086851767188564,0 sciq,acc,0.911,0.009008893392651523,0 sciq,acc_norm,0.891,0.009859828407037186,0 storycloze_2016,acc,0.7268840192410476,0.010303512765124683,0 winogrande,acc,0.5951065509076559,0.013795927003124934,0