|
task,metric,value,err,version
|
|
anli_r1,acc,0.323,0.014794927843348635,0
|
|
anli_r2,acc,0.332,0.014899597242811482,0
|
|
anli_r3,acc,0.33,0.013579531277800923,0
|
|
arc_challenge,acc,0.2235494880546075,0.012174896631202605,0
|
|
arc_challenge,acc_norm,0.2525597269624573,0.012696728980207706,0
|
|
arc_easy,acc,0.5294612794612794,0.010241957728409686,0
|
|
arc_easy,acc_norm,0.476010101010101,0.010247967392742691,0
|
|
boolq,acc,0.5788990825688073,0.008635491562221343,1
|
|
cb,acc,0.25,0.058387420812114225,1
|
|
cb,f1,0.2191358024691358,,1
|
|
copa,acc,0.69,0.04648231987117316,0
|
|
hellaswag,acc,0.3650667197769369,0.004804649197163699,0
|
|
hellaswag,acc_norm,0.4427404899422426,0.004956953917781311,0
|
|
piqa,acc,0.7034820457018498,0.010656078922661153,0
|
|
piqa,acc_norm,0.7067464635473341,0.010621818421101931,0
|
|
rte,acc,0.5451263537906137,0.029973636495415252,0
|
|
sciq,acc,0.811,0.012386784588117707,0
|
|
sciq,acc_norm,0.726,0.014111099288259587,0
|
|
storycloze_2016,acc,0.6467129877071085,0.011053474766125627,0
|
|
winogrande,acc,0.5185477505919495,0.014042813708888378,0
|
|
|