|
task,metric,value,err,version
|
|
anli_r1,acc,0.327,0.014842213153411242,0
|
|
anli_r2,acc,0.338,0.014965960710224487,0
|
|
anli_r3,acc,0.3333333333333333,0.0136139500102256,0
|
|
arc_challenge,acc,0.26706484641638223,0.01292893319649636,0
|
|
arc_challenge,acc_norm,0.30631399317406144,0.013470584417276513,0
|
|
arc_easy,acc,0.5892255892255892,0.01009510134934865,0
|
|
arc_easy,acc_norm,0.5361952861952862,0.01023286555034674,0
|
|
boolq,acc,0.6186544342507645,0.008495245917063564,1
|
|
cb,acc,0.5,0.06741998624632421,1
|
|
cb,f1,0.34164884770729387,,1
|
|
copa,acc,0.77,0.04229525846816506,0
|
|
hellaswag,acc,0.46634136626170086,0.00497846269096693,0
|
|
hellaswag,acc_norm,0.6101374228241386,0.004867221634461266,0
|
|
piqa,acc,0.7459194776931447,0.010157271999135041,0
|
|
piqa,acc_norm,0.7524483133841132,0.010069703966857114,0
|
|
rte,acc,0.5776173285198556,0.029731622646495887,0
|
|
sciq,acc,0.847,0.011389500459665532,0
|
|
sciq,acc_norm,0.777,0.013169830843425673,0
|
|
storycloze_2016,acc,0.7167290219134153,0.010419760409155363,0
|
|
winogrande,acc,0.5895816890292028,0.013825107120035861,0
|
|
|