|
task,metric,value,err,version
|
|
anli_r1,acc,0.328,0.014853842487270334,0
|
|
anli_r2,acc,0.321,0.01477082181793464,0
|
|
anli_r3,acc,0.33416666666666667,0.013622434813136783,0
|
|
arc_challenge,acc,0.3455631399317406,0.01389693846114569,0
|
|
arc_challenge,acc_norm,0.3583617747440273,0.014012883334859859,0
|
|
arc_easy,acc,0.6914983164983165,0.009477472342978122,0
|
|
arc_easy,acc_norm,0.6734006734006734,0.009623047038267656,0
|
|
boolq,acc,0.6584097859327217,0.008294560677768487,1
|
|
cb,acc,0.42857142857142855,0.06672848092813058,1
|
|
cb,f1,0.30424242424242426,,1
|
|
copa,acc,0.82,0.038612291966536955,0
|
|
hellaswag,acc,0.528779127663812,0.004981509099276353,0
|
|
hellaswag,acc_norm,0.7054371639115714,0.004549143750428458,0
|
|
piqa,acc,0.7763873775843307,0.009721489519176289,0
|
|
piqa,acc_norm,0.7872687704026116,0.009548223123047346,0
|
|
rte,acc,0.5992779783393501,0.029497229237163147,0
|
|
sciq,acc,0.931,0.008018934050315157,0
|
|
sciq,acc_norm,0.923,0.008434580140240634,0
|
|
storycloze_2016,acc,0.7594869053981828,0.009883453084862687,0
|
|
winogrande,acc,0.6464088397790055,0.013436541262599955,0
|
|
|