|
task,metric,value,err,version
|
|
anli_r1,acc,0.339,0.014976758771620347,0
|
|
anli_r2,acc,0.322,0.014782913600996664,0
|
|
anli_r3,acc,0.35333333333333333,0.013804572162314925,0
|
|
arc_challenge,acc,0.28498293515358364,0.013191348179838793,0
|
|
arc_challenge,acc_norm,0.310580204778157,0.01352229209805305,0
|
|
arc_easy,acc,0.6195286195286195,0.00996230599205857,0
|
|
arc_easy,acc_norm,0.6136363636363636,0.009991296778159615,0
|
|
boolq,acc,0.5290519877675841,0.00873028052845153,1
|
|
cb,acc,0.375,0.06527912098338669,1
|
|
cb,f1,0.25089094796863864,,1
|
|
copa,acc,0.76,0.04292346959909283,0
|
|
hellaswag,acc,0.4671380203146783,0.004978992721242829,0
|
|
hellaswag,acc_norm,0.6250746863174667,0.004831142570475509,0
|
|
piqa,acc,0.7453754080522307,0.01016443223706049,0
|
|
piqa,acc_norm,0.7595212187159956,0.009971345364651066,0
|
|
rte,acc,0.5018050541516246,0.030096267148976626,0
|
|
sciq,acc,0.906,0.009233052000787736,0
|
|
sciq,acc_norm,0.894,0.009739551265785133,0
|
|
storycloze_2016,acc,0.7252805986103688,0.010322309878339502,0
|
|
winogrande,acc,0.5832675611681136,0.01385625007279632,0
|
|
|