|
task,metric,value,err,version
|
|
anli_r1,acc,0.311,0.014645596385722694,0
|
|
anli_r2,acc,0.299,0.014484778521220477,0
|
|
anli_r3,acc,0.335,0.013630871843821474,0
|
|
arc_challenge,acc,0.28924914675767915,0.013250012579393443,0
|
|
arc_challenge,acc_norm,0.318259385665529,0.013611993916971453,0
|
|
arc_easy,acc,0.6401515151515151,0.009848484848484843,0
|
|
arc_easy,acc_norm,0.6346801346801347,0.009880576614806924,0
|
|
boolq,acc,0.6241590214067279,0.008471147248160114,1
|
|
cb,acc,0.5178571428571429,0.06737697508644647,1
|
|
cb,f1,0.43401043401043404,,1
|
|
copa,acc,0.82,0.038612291966536955,0
|
|
hellaswag,acc,0.45140410276837284,0.004966158142645416,0
|
|
hellaswag,acc_norm,0.601274646484764,0.0048863535635718415,0
|
|
piqa,acc,0.7453754080522307,0.010164432237060487,0
|
|
piqa,acc_norm,0.7448313384113167,0.010171571592521834,0
|
|
rte,acc,0.49097472924187724,0.030091559826331334,0
|
|
sciq,acc,0.927,0.008230354715244055,0
|
|
sciq,acc_norm,0.928,0.008178195576218681,0
|
|
storycloze_2016,acc,0.7097808658471406,0.010495529690730063,0
|
|
winogrande,acc,0.590370955011839,0.013821049109655491,0
|
|
|