|
task,metric,value,err,version
|
|
anli_r1,acc,0.331,0.014888272588203936,0
|
|
anli_r2,acc,0.336,0.014944140233795027,0
|
|
anli_r3,acc,0.3425,0.013704669762934727,0
|
|
arc_challenge,acc,0.2781569965870307,0.013094469919538816,0
|
|
arc_challenge,acc_norm,0.29436860068259385,0.013318528460539426,0
|
|
arc_easy,acc,0.609006734006734,0.010012992232540633,0
|
|
arc_easy,acc_norm,0.5593434343434344,0.010187264635711991,0
|
|
boolq,acc,0.5892966360856269,0.008604460608471413,1
|
|
cb,acc,0.42857142857142855,0.06672848092813058,1
|
|
cb,f1,0.21956970232832299,,1
|
|
copa,acc,0.74,0.044084400227680794,0
|
|
hellaswag,acc,0.4480183230432185,0.004962742426849887,0
|
|
hellaswag,acc_norm,0.5839474208325035,0.0049189510191838875,0
|
|
piqa,acc,0.7442872687704026,0.010178690109459857,0
|
|
piqa,acc_norm,0.7546245919477693,0.010039831320422386,0
|
|
rte,acc,0.5631768953068592,0.029855247390314945,0
|
|
sciq,acc,0.865,0.010811655372416053,0
|
|
sciq,acc_norm,0.793,0.012818553557843983,0
|
|
storycloze_2016,acc,0.6916087653661144,0.010679734445487797,0
|
|
winogrande,acc,0.5730071033938438,0.01390187807257506,0
|
|
|