|
task,metric,value,err,version
|
|
anli_r1,acc,0.334,0.014922019523732963,0
|
|
anli_r2,acc,0.333,0.014910846164229868,0
|
|
anli_r3,acc,0.3491666666666667,0.013767075395077247,0
|
|
arc_challenge,acc,0.29266211604095566,0.013295916103619411,0
|
|
arc_challenge,acc_norm,0.3225255972696246,0.01365998089427737,0
|
|
arc_easy,acc,0.6212121212121212,0.009953737656542035,0
|
|
arc_easy,acc_norm,0.5833333333333334,0.010116282977781254,0
|
|
boolq,acc,0.599388379204893,0.008570545612096374,1
|
|
cb,acc,0.35714285714285715,0.0646095738380922,1
|
|
cb,f1,0.23179160021265285,,1
|
|
copa,acc,0.79,0.040936018074033256,0
|
|
hellaswag,acc,0.48088030272854015,0.004986131919673967,0
|
|
hellaswag,acc_norm,0.630053774148576,0.004818031396138917,0
|
|
piqa,acc,0.7529923830250272,0.01006226814077262,0
|
|
piqa,acc_norm,0.7627856365614799,0.009924694933586374,0
|
|
rte,acc,0.5667870036101083,0.029826764082138277,0
|
|
sciq,acc,0.887,0.010016552866696848,0
|
|
sciq,acc_norm,0.876,0.01042749887234396,0
|
|
storycloze_2016,acc,0.7204703367183325,0.01037770209970486,0
|
|
winogrande,acc,0.5951065509076559,0.013795927003124939,0
|
|
|