# Languages are for reference only, and aren't used in the graph. | |
languages: [ | |
"ca", "es", "fr", "it", "pt", "ro" | |
] | |
# Model has 512 positional embeddings, but only trained on sequences up to 256 tokens | |
max_length: 256 | |
# Labels that may be predicted before each token | |
pre_labels: [ | |
"<NULL>", | |
"¿", | |
] | |
# Labels that may be predicted after each token | |
post_labels: [ | |
"<NULL>", | |
"<ACRONYM>", | |
".", | |
",", | |
"?", | |
] |