|
task,metric,value,err,version
|
|
anli_r1,acc,0.327,0.014842213153411249,0
|
|
anli_r2,acc,0.324,0.014806864733738854,0
|
|
anli_r3,acc,0.3175,0.013443538681348054,0
|
|
arc_challenge,acc,0.2696245733788396,0.01296804068686915,0
|
|
arc_challenge,acc_norm,0.2883959044368601,0.013238394422428175,0
|
|
arc_easy,acc,0.5951178451178452,0.010072423960395701,0
|
|
arc_easy,acc_norm,0.5803872053872053,0.010126315840891536,0
|
|
boolq,acc,0.5636085626911315,0.008674000467432073,1
|
|
cb,acc,0.5178571428571429,0.06737697508644648,1
|
|
cb,f1,0.33564993564993567,,1
|
|
copa,acc,0.76,0.04292346959909283,0
|
|
hellaswag,acc,0.43158733320055764,0.004942853459371548,0
|
|
hellaswag,acc_norm,0.5655247958573989,0.004946748608271348,0
|
|
piqa,acc,0.7328618063112078,0.010323440492612437,0
|
|
piqa,acc_norm,0.7470076169749728,0.010142888698862453,0
|
|
rte,acc,0.5270758122743683,0.030052303463143706,0
|
|
sciq,acc,0.903,0.009363689373248111,0
|
|
sciq,acc_norm,0.901,0.009449248027662747,0
|
|
storycloze_2016,acc,0.6889363976483164,0.010705164869803167,0
|
|
winogrande,acc,0.5564325177584846,0.0139626949076204,0
|
|
|