|
task,metric,value,err,version
|
|
anli_r1,acc,0.329,0.014865395385928362,0
|
|
anli_r2,acc,0.314,0.014683991951087962,0
|
|
anli_r3,acc,0.345,0.01372842153945487,0
|
|
arc_challenge,acc,0.31399317406143346,0.013562691224726281,0
|
|
arc_challenge,acc_norm,0.3216723549488055,0.013650488084494162,0
|
|
arc_easy,acc,0.6325757575757576,0.009892552616211555,0
|
|
arc_easy,acc_norm,0.6376262626262627,0.009863468202583773,0
|
|
boolq,acc,0.5767584097859327,0.008641391399113598,1
|
|
cb,acc,0.39285714285714285,0.0658538889806635,1
|
|
cb,f1,0.33259154725720075,,1
|
|
copa,acc,0.71,0.045604802157206845,0
|
|
hellaswag,acc,0.45717984465245964,0.004971449552787172,0
|
|
hellaswag,acc_norm,0.6106353316072496,0.00486609688094144,0
|
|
piqa,acc,0.7442872687704026,0.010178690109459862,0
|
|
piqa,acc_norm,0.7562568008705114,0.010017199471500609,0
|
|
rte,acc,0.5090252707581228,0.030091559826331334,0
|
|
sciq,acc,0.926,0.008282064512704159,0
|
|
sciq,acc_norm,0.941,0.007454835650406725,0
|
|
storycloze_2016,acc,0.7226082308925709,0.010353267472010768,0
|
|
winogrande,acc,0.5974743488555643,0.013782866831703044,0
|
|
|