|
task,metric,value,err,version
|
|
anli_r1,acc,0.322,0.014782913600996683,0
|
|
anli_r2,acc,0.312,0.014658474370509014,0
|
|
anli_r3,acc,0.31666666666666665,0.01343407866082738,0
|
|
arc_challenge,acc,0.21928327645051193,0.01209124578761573,0
|
|
arc_challenge,acc_norm,0.25,0.012653835621466646,0
|
|
arc_easy,acc,0.5328282828282829,0.010237645778853872,0
|
|
arc_easy,acc_norm,0.5063131313131313,0.01025896566804444,0
|
|
boolq,acc,0.5749235474006116,0.00864631615937318,1
|
|
cb,acc,0.5,0.06741998624632421,1
|
|
cb,f1,0.3554421768707483,,1
|
|
copa,acc,0.71,0.045604802157206845,0
|
|
hellaswag,acc,0.36456881099382593,0.004803253812881047,0
|
|
hellaswag,acc_norm,0.4435371439952201,0.004957863944093114,0
|
|
piqa,acc,0.7007616974972797,0.010684130673134581,0
|
|
piqa,acc_norm,0.7007616974972797,0.010684130673134581,0
|
|
rte,acc,0.5451263537906137,0.029973636495415252,0
|
|
sciq,acc,0.862,0.0109121526325044,0
|
|
sciq,acc_norm,0.848,0.01135891830347529,0
|
|
storycloze_2016,acc,0.6317477284874399,0.011153823258531741,0
|
|
winogrande,acc,0.5280189423835833,0.014030404213405788,0
|
|
|