|
task,metric,value,err,version
|
|
anli_r1,acc,0.367,0.01524937846417175,0
|
|
anli_r2,acc,0.325,0.014818724459095527,0
|
|
anli_r3,acc,0.3425,0.013704669762934722,0
|
|
arc_challenge,acc,0.28498293515358364,0.013191348179838795,0
|
|
arc_challenge,acc_norm,0.3191126279863481,0.013621696119173304,0
|
|
arc_easy,acc,0.6056397306397306,0.010028176038393007,0
|
|
arc_easy,acc_norm,0.5812289562289562,0.010123487160167819,0
|
|
boolq,acc,0.6192660550458715,0.008492625561656217,1
|
|
cb,acc,0.42857142857142855,0.06672848092813058,1
|
|
cb,f1,0.3487396784006953,,1
|
|
copa,acc,0.71,0.04560480215720684,0
|
|
hellaswag,acc,0.46265684126667994,0.004975845335086618,0
|
|
hellaswag,acc_norm,0.6078470424218283,0.004872326888655505,0
|
|
piqa,acc,0.7453754080522307,0.01016443223706048,0
|
|
piqa,acc_norm,0.7453754080522307,0.010164432237060492,0
|
|
rte,acc,0.5379061371841155,0.030009848912529117,0
|
|
sciq,acc,0.865,0.010811655372416051,0
|
|
sciq,acc_norm,0.851,0.01126614068463217,0
|
|
storycloze_2016,acc,0.7113842864778194,0.010478311785642947,0
|
|
winogrande,acc,0.5572217837411207,0.013960157350784985,0
|
|
|