|
task,metric,value,err,version
|
|
anli_r1,acc,0.326,0.014830507204541037,0
|
|
anli_r2,acc,0.337,0.014955087918653607,0
|
|
anli_r3,acc,0.33416666666666667,0.013622434813136774,0
|
|
arc_challenge,acc,0.28071672354948807,0.013131238126975576,0
|
|
arc_challenge,acc_norm,0.3037542662116041,0.013438909184778766,0
|
|
arc_easy,acc,0.5993265993265994,0.010055304474255573,0
|
|
arc_easy,acc_norm,0.5694444444444444,0.010160345396860082,0
|
|
boolq,acc,0.5752293577981651,0.008645503833361106,1
|
|
cb,acc,0.42857142857142855,0.06672848092813058,1
|
|
cb,f1,0.26622479977906655,,1
|
|
copa,acc,0.81,0.039427724440366234,0
|
|
hellaswag,acc,0.4629555865365465,0.004976067726432562,0
|
|
hellaswag,acc_norm,0.609838677554272,0.004867893927258165,0
|
|
piqa,acc,0.7437431991294886,0.01018578783156506,0
|
|
piqa,acc_norm,0.7524483133841132,0.010069703966857116,0
|
|
rte,acc,0.5270758122743683,0.0300523034631437,0
|
|
sciq,acc,0.844,0.011480235006122363,0
|
|
sciq,acc_norm,0.794,0.012795613612786548,0
|
|
storycloze_2016,acc,0.7145911277391769,0.010443395884062115,0
|
|
winogrande,acc,0.5824782951854776,0.013859978264440246,0
|
|
|