{ "version": "1.0", "truncation": { "direction": "Right", "max_length": 100, "strategy": "LongestFirst", "stride": 0 }, "padding": null, "added_tokens": [ { "id": 0, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": { "type": "Sequence", "normalizers": [ { "type": "Replace", "pattern": { "String": " " }, "content": "" }, { "type": "Replace", "pattern": { "String": "ſ" }, "content": "s" }, { "type": "Replace", "pattern": { "String": "aͤ" }, "content": "ä" }, { "type": "Replace", "pattern": { "String": "oͤ" }, "content": "ö" }, { "type": "Replace", "pattern": { "String": "uͤ" }, "content": "ü" } ] }, "pre_tokenizer": null, "post_processor": { "type": "RobertaProcessing", "sep": [ "", 2 ], "cls": [ "", 1 ], "trim_offsets": true, "add_prefix_space": true }, "decoder": { "type": "BPEDecoder", "suffix": "" }, "model": { "type": "BPE", "dropout": null, "unk_token": "", "continuing_subword_prefix": null, "end_of_word_suffix": null, "fuse_unk": false, "byte_fallback": false, "ignore_merges": false, "vocab": { "": 0, "": 1, "": 2, "": 3, "!": 4, "&": 5, "'": 6, "(": 7, ")": 8, "*": 9, ",": 10, "-": 11, ".": 12, "0": 13, "1": 14, "2": 15, "3": 16, "4": 17, "5": 18, "6": 19, "7": 20, "8": 21, "9": 22, ":": 23, ";": 24, "=": 25, ">": 26, "?": 27, "A": 28, "B": 29, "C": 30, "D": 31, "E": 32, "F": 33, "G": 34, "H": 35, "I": 36, "J": 37, "K": 38, "L": 39, "M": 40, "N": 41, "O": 42, "P": 43, "Q": 44, "R": 45, "S": 46, "T": 47, "U": 48, "V": 49, "W": 50, "X": 51, "Y": 52, "Z": 53, "[": 54, "a": 55, "b": 56, "c": 57, "d": 58, "e": 59, "f": 60, "g": 61, "h": 62, "i": 63, "j": 64, "k": 65, "l": 66, "m": 67, "n": 68, "o": 69, "p": 70, "q": 71, "r": 72, "s": 73, "t": 74, "u": 75, "v": 76, "w": 77, "x": 78, "y": 79, "z": 80, "«": 81, "°": 82, "»": 83, "½": 84, "Ä": 85, "Ç": 86, "É": 87, "Ö": 88, "Ü": 89, "ß": 90, "à": 91, "â": 92, "ä": 93, "ç": 94, "è": 95, "é": 96, "ê": 97, "ë": 98, "î": 99, "ñ": 100, "ô": 101, "ö": 102, "û": 103, "ü": 104, "ē": 105, "ĕ": 106, "‒": 107, "–": 108, "—": 109, "‘": 110, "’": 111, "‚": 112, "‛": 113, "“": 114, "”": 115, "„": 116, "″": 117, "⁊": 118, "▁": 119, "░": 120, "ꝛ": 121 }, "merges": [] } }