# Languages are for reference only, and aren't used in the graph.
languages:  [
  "ca", "es", "fr", "it", "pt", "ro"
]

# Model has 512 positional embeddings, but only trained on sequences up to 256 tokens
max_length: 256

# Labels that may be predicted before each token
pre_labels: [
  "<NULL>",
  "¿",
]

# Labels that may be predicted after each token
post_labels: [
  "<NULL>",
  "<ACRONYM>",
  ".",
  ",",
  "?",
]