ReactSeq / onmt /constants.py
Oopstom's picture
Upload 313 files
c668e80 verified
raw
history blame
880 Bytes
"""Define constant values used across the project."""
class DefaultTokens(object):
PAD = "<blank>"
BOS = "<s>"
EOS = "</s>"
UNK = "<unk>"
MASK = "<mask>"
VOCAB_PAD = "averyunlikelytoken"
SENT_FULL_STOPS = [".", "?", "!"]
PHRASE_TABLE_SEPARATOR = "|||"
ALIGNMENT_SEPARATOR = " ||| "
SEP = "⦅newline⦆"
MASK_BEFORE = "⦅_mask_before_⦆"
class CorpusName(object):
VALID = "valid"
TRAIN = "train"
SAMPLE = "sample"
INFER = "infer"
class CorpusTask(object):
TRAIN = "train"
VALID = "valid"
INFER = "infer"
class SubwordMarker(object):
SPACER = "▁"
JOINER = "■"
BEGIN_UPPERCASE = "⦅mrk_begin_case_region_U⦆"
END_UPPERCASE = "⦅mrk_end_case_region_U⦆"
BEGIN_CASED = "⦅mrk_case_modifier_C⦆"
class ModelTask(object):
LANGUAGE_MODEL = "lm"
SEQ2SEQ = "seq2seq"