|
task,metric,value,err,version
|
|
anli_r1,acc,0.339,0.01497675877162034,0
|
|
anli_r2,acc,0.335,0.014933117490932573,0
|
|
anli_r3,acc,0.33416666666666667,0.013622434813136783,0
|
|
arc_challenge,acc,0.17918088737201365,0.011207045216615674,0
|
|
arc_challenge,acc_norm,0.2235494880546075,0.012174896631202614,0
|
|
arc_easy,acc,0.4335016835016835,0.010168640625454107,0
|
|
arc_easy,acc_norm,0.3846801346801347,0.009983171707009006,0
|
|
boolq,acc,0.5938837920489297,0.008589510943787407,1
|
|
cb,acc,0.4107142857142857,0.0663363415035954,1
|
|
cb,f1,0.1940928270042194,,1
|
|
copa,acc,0.62,0.04878317312145632,0
|
|
hellaswag,acc,0.2951603266281617,0.004551826272978059,0
|
|
hellaswag,acc_norm,0.3241386178052181,0.004670955399641126,0
|
|
piqa,acc,0.6218715995647442,0.011313980666854535,0
|
|
piqa,acc_norm,0.6267682263329706,0.011284653078254898,0
|
|
rte,acc,0.5234657039711191,0.03006330041190266,0
|
|
sciq,acc,0.732,0.01401329270272948,0
|
|
sciq,acc_norm,0.669,0.01488827258820394,0
|
|
storycloze_2016,acc,0.5873864243719936,0.011384472322969045,0
|
|
winogrande,acc,0.5059194948697711,0.01405150083848581,0
|
|
|