|
task,metric,value,err,version
|
|
anli_r1,acc,0.304,0.014553205687950446,0
|
|
anli_r2,acc,0.33,0.014876872027456727,0
|
|
anli_r3,acc,0.33,0.013579531277800922,0
|
|
arc_challenge,acc,0.2960750853242321,0.013340916085246268,0
|
|
arc_challenge,acc_norm,0.3216723549488055,0.013650488084494164,0
|
|
arc_easy,acc,0.6321548821548821,0.009894923464455191,0
|
|
arc_easy,acc_norm,0.6275252525252525,0.009920469215736012,0
|
|
boolq,acc,0.6388379204892967,0.00840115419524237,1
|
|
cb,acc,0.5178571428571429,0.06737697508644648,1
|
|
cb,f1,0.34887334887334887,,1
|
|
copa,acc,0.78,0.04163331998932262,0
|
|
hellaswag,acc,0.4751045608444533,0.004983592410934173,0
|
|
hellaswag,acc_norm,0.6331408086038638,0.0048096267236268486,0
|
|
piqa,acc,0.7595212187159956,0.009971345364651073,0
|
|
piqa,acc_norm,0.7676822633297062,0.009853201384168243,0
|
|
rte,acc,0.5487364620938628,0.029953149241808943,0
|
|
sciq,acc,0.911,0.009008893392651526,0
|
|
sciq,acc_norm,0.903,0.009363689373248113,0
|
|
storycloze_2016,acc,0.72367717797969,0.010340939873166822,0
|
|
winogrande,acc,0.5943172849250198,0.013800206336014201,0
|
|
|