|
task,metric,value,err,version
|
|
anli_r1,acc,0.361,0.015195720118175124,0
|
|
anli_r2,acc,0.332,0.014899597242811475,0
|
|
anli_r3,acc,0.3441666666666667,0.013720551062295755,0
|
|
arc_challenge,acc,0.30631399317406144,0.013470584417276513,0
|
|
arc_challenge,acc_norm,0.32337883959044367,0.013669421630012122,0
|
|
arc_easy,acc,0.6245791245791246,0.0099362185271143,0
|
|
arc_easy,acc_norm,0.6199494949494949,0.009960175831493131,0
|
|
boolq,acc,0.6152905198776758,0.008509403073229692,1
|
|
cb,acc,0.44642857142857145,0.06703189227942398,1
|
|
cb,f1,0.3336203597397627,,1
|
|
copa,acc,0.77,0.04229525846816506,0
|
|
hellaswag,acc,0.4676359290977893,0.004979317515432522,0
|
|
hellaswag,acc_norm,0.6258713403704441,0.004829081532826523,0
|
|
piqa,acc,0.7529923830250272,0.010062268140772629,0
|
|
piqa,acc_norm,0.7540805223068553,0.010047331865625184,0
|
|
rte,acc,0.5054151624548736,0.030094698123239966,0
|
|
sciq,acc,0.913,0.008916866630745913,0
|
|
sciq,acc_norm,0.904,0.009320454434783217,0
|
|
storycloze_2016,acc,0.721004810261892,0.010371620932652793,0
|
|
winogrande,acc,0.5816890292028414,0.013863669961195908,0
|
|
|