|
task,metric,value,err,version
|
|
anli_r1,acc,0.331,0.014888272588203926,0
|
|
anli_r2,acc,0.339,0.01497675877162035,0
|
|
anli_r3,acc,0.34,0.013680495725767794,0
|
|
arc_challenge,acc,0.26109215017064846,0.012835523909473848,0
|
|
arc_challenge,acc_norm,0.2858361774744027,0.013203196088537367,0
|
|
arc_easy,acc,0.5845959595959596,0.010111869494911517,0
|
|
arc_easy,acc_norm,0.5555555555555556,0.01019625483869168,0
|
|
boolq,acc,0.48807339449541287,0.008742566760633421,1
|
|
cb,acc,0.3392857142857143,0.06384226561930825,1
|
|
cb,f1,0.31372797744890774,,1
|
|
copa,acc,0.79,0.040936018074033256,0
|
|
hellaswag,acc,0.4364668392750448,0.0049493353568818635,0
|
|
hellaswag,acc_norm,0.5734913363871739,0.004935587729948866,0
|
|
piqa,acc,0.7404787812840044,0.01022793988817392,0
|
|
piqa,acc_norm,0.7529923830250272,0.010062268140772625,0
|
|
rte,acc,0.48014440433212996,0.0300727231673172,0
|
|
sciq,acc,0.857,0.01107581480856704,0
|
|
sciq,acc_norm,0.842,0.011539894677559564,0
|
|
storycloze_2016,acc,0.7172634954569749,0.010413806486121271,0
|
|
winogrande,acc,0.5627466456195738,0.013941393310695922,0
|
|
|