|
task,metric,value,err,version
|
|
anli_r1,acc,0.314,0.01468399195108795,0
|
|
anli_r2,acc,0.337,0.014955087918653609,0
|
|
anli_r3,acc,0.35,0.013774667009018558,0
|
|
arc_challenge,acc,0.29180887372013653,0.013284525292403506,0
|
|
arc_challenge,acc_norm,0.31313993174061433,0.013552671543623494,0
|
|
arc_easy,acc,0.6043771043771043,0.010033741393430983,0
|
|
arc_easy,acc_norm,0.5925925925925926,0.010082326627832861,0
|
|
boolq,acc,0.617737003058104,0.008499149690449273,1
|
|
cb,acc,0.42857142857142855,0.06672848092813057,1
|
|
cb,f1,0.31174851513834567,,1
|
|
copa,acc,0.75,0.04351941398892446,0
|
|
hellaswag,acc,0.46395140410276836,0.004976796060456438,0
|
|
hellaswag,acc_norm,0.6093407687711612,0.0048690101522807505,0
|
|
piqa,acc,0.7453754080522307,0.01016443223706049,0
|
|
piqa,acc_norm,0.7404787812840044,0.010227939888173923,0
|
|
rte,acc,0.592057761732852,0.029581952519606197,0
|
|
sciq,acc,0.876,0.010427498872343961,0
|
|
sciq,acc_norm,0.871,0.010605256784796565,0
|
|
storycloze_2016,acc,0.7044361304115446,0.010551778839373784,0
|
|
winogrande,acc,0.5572217837411207,0.013960157350784978,0
|
|
|