|
task,metric,value,err,version
|
|
anli_r1,acc,0.348,0.015070604603768408,0
|
|
anli_r2,acc,0.355,0.01513949154378053,0
|
|
anli_r3,acc,0.3591666666666667,0.013855141559780354,0
|
|
arc_challenge,acc,0.27986348122866894,0.013119040897725922,0
|
|
arc_challenge,acc_norm,0.30716723549488056,0.013481034054980943,0
|
|
arc_easy,acc,0.6077441077441077,0.010018744689650043,0
|
|
arc_easy,acc_norm,0.6085858585858586,0.010014917532627817,0
|
|
boolq,acc,0.6162079510703364,0.008505584729104966,1
|
|
cb,acc,0.44642857142857145,0.06703189227942398,1
|
|
cb,f1,0.3114219114219114,,1
|
|
copa,acc,0.69,0.04648231987117316,0
|
|
hellaswag,acc,0.4095797649870544,0.00490751210312835,0
|
|
hellaswag,acc_norm,0.5337582154949213,0.004978395540514379,0
|
|
piqa,acc,0.7236126224156693,0.010434162388275615,0
|
|
piqa,acc_norm,0.7328618063112078,0.010323440492612423,0
|
|
rte,acc,0.49458483754512633,0.030094698123239966,0
|
|
sciq,acc,0.915,0.008823426366942324,0
|
|
sciq,acc_norm,0.918,0.00868051561552372,0
|
|
storycloze_2016,acc,0.6819882415820417,0.010769343495248544,0
|
|
winogrande,acc,0.5603788476716653,0.01394964977601569,0
|
|
|