# English model | |
languages: [ | |
"en" | |
] | |
# Model has 512 positional embeddings, but only trained on sequences up to 256 tokens | |
max_length: 256 | |
# Not actually used, but included for pipeline generality | |
pre_labels: [ | |
"<NULL>", | |
"¿", | |
] | |
post_labels: [ | |
"<NULL>", | |
"<ACRONYM>", | |
".", | |
",", | |
"?", | |
] |