|
task,metric,value,err,version
|
|
anli_r1,acc,0.331,0.01488827258820394,0
|
|
anli_r2,acc,0.341,0.0149981313484027,0
|
|
anli_r3,acc,0.34,0.0136804957257678,0
|
|
arc_challenge,acc,0.3037542662116041,0.01343890918477875,0
|
|
arc_challenge,acc_norm,0.3319112627986348,0.013760988200880538,0
|
|
arc_easy,acc,0.625,0.009933992677987828,0
|
|
arc_easy,acc_norm,0.5984848484848485,0.010058790020755562,0
|
|
boolq,acc,0.6278287461773701,0.008454434247373908,1
|
|
cb,acc,0.2857142857142857,0.060914490387317256,1
|
|
cb,f1,0.2849772788024592,,1
|
|
copa,acc,0.8,0.04020151261036843,0
|
|
hellaswag,acc,0.47450707030472017,0.00498329157828904,0
|
|
hellaswag,acc_norm,0.6297550288787094,0.004818833521340352,0
|
|
piqa,acc,0.7486398258977149,0.010121156016819257,0
|
|
piqa,acc_norm,0.7665941240478781,0.009869247889520998,0
|
|
rte,acc,0.49458483754512633,0.03009469812323996,0
|
|
sciq,acc,0.901,0.009449248027662751,0
|
|
sciq,acc_norm,0.886,0.010055103435823332,0
|
|
storycloze_2016,acc,0.7177979690005345,0.01040783447964767,0
|
|
winogrande,acc,0.5761641673243884,0.013888492389944511,0
|
|
|