{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "[STOP]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "[UNK]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "[SPACE]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": null, "pre_tokenizer": { "type": "Whitespace" }, "post_processor": null, "decoder": null, "model": { "type": "BPE", "dropout": null, "unk_token": "[UNK]", "continuing_subword_prefix": null, "end_of_word_suffix": null, "fuse_unk": false, "vocab": { "[STOP]": 0, "[UNK]": 1, "[SPACE]": 2, "!": 3, "(": 4, ")": 5, ",": 6, "-": 7, ".": 8, "/": 9, "0": 10, "1": 11, "2": 12, "3": 13, "4": 14, "5": 15, "6": 16, "7": 17, "8": 18, "9": 19, ":": 20, ";": 21, "?": 22, "A": 23, "B": 24, "C": 25, "D": 26, "E": 27, "F": 28, "G": 29, "H": 30, "I": 31, "J": 32, "K": 33, "L": 34, "M": 35, "N": 36, "O": 37, "P": 38, "Q": 39, "R": 40, "S": 41, "T": 42, "U": 43, "V": 44, "W": 45, "X": 46, "Y": 47, "Z": 48, "a": 49, "b": 50, "c": 51, "d": 52, "e": 53, "f": 54, "g": 55, "h": 56, "i": 57, "j": 58, "k": 59, "l": 60, "m": 61, "n": 62, "o": 63, "p": 64, "q": 65, "r": 66, "s": 67, "t": 68, "u": 69, "v": 70, "w": 71, "x": 72, "y": 73, "z": 74, "«": 75, "»": 76, "À": 77, "Ç": 78, "É": 79, "Ô": 80, "à": 81, "â": 82, "ç": 83, "è": 84, "é": 85, "ê": 86, "ë": 87, "î": 88, "ï": 89, "ô": 90, "ù": 91, "û": 92, "œ": 93, "–": 94, "’": 95, "le": 96, "en": 97, "ai": 98, "es": 99, "ou": 100, "on": 101, "de": 102, "re": 103, "an": 104, "qu": 105, "et": 106, "er": 107, "ent": 108, "il": 109, "la": 110, "ait": 111, "ne": 112, "it": 113, "ar": 114, "au": 115, "ch": 116, "is": 117, "ur": 118, "se": 119, "el": 120, "te": 121, "in": 122, "les": 123, "our": 124, "av": 125, "or": 126, "eu": 127, "ant": 128, "ais": 129, "lu": 130, "me": 131, "que": 132, "pr": 133, "as": 134, "om": 135, "ce": 136, "oi": 137, "elle": 138, "un": 139, "ét": 140, "des": 141, "ous": 142, "ans": 143, "tr": 144, "par": 145, "si": 146, "al": 147, "une": 148, "du": 149, "ri": 150, "est": 151, "qui": 152, "son": 153, "aient": 154, "pas": 155, "tre": 156, "che": 157, "dans": 158, "pour": 159, "sa": 160, "vi": 161, "ment": 162, "ti": 163, "leur": 164, "ré": 165, "con": 166, "out": 167, "ère": 168, "com": 169, "dit": 170, "ois": 171, "ouv": 172, "ge": 173, "avait": 174, "lui": 175, "était": 176, "plu": 177, "ra": 178, "mais": 179, "ain": 180, "eux": 181, "bi": 182, "je": 183, "plus": 184, "ses": 185, "ir": 186, "Il": 187, "dé": 188, "ma": 189, "end": 190, "mon": 191, "ils": 192, "ée": 193, "and": 194, "sur": 195, "li": 196, "ci": 197, "ille": 198, "vous": 199, "vo": 200, "bien": 201, "mp": 202, "ap": 203, "ave": 204, "ser": 205, "oir": 206, "tout": 207, "mi": 208, "su": 209, "don": 210, "rent": 211, "tu": 212, "gr": 213, "ces": 214, "etit": 215, "ier": 216, "ter": 217, "co": 218, "petit": 219, "ab": 220, "ons": 221, "jour": 222, "comme": 223, "ro": 224, "res": 225, "avec": 226, "eau": 227, "és": 228, "ort": 229, "éc": 230, "uis": 231, "Le": 232, "ut": 233, "èrent": 234, "ff": 235, "lle": 236, "oin": 237, "gar": 238, "gn": 239, "enf": 240, "us": 241, "ors": 242, "br": 243, "ès": 244, "ta": 245, "quel": 246, "ette": 247, "cou": 248, "mes": 249, "vie": 250, "di": 251, "gu": 252, "aire": 253, "per": 254 }, "merges": [ "l e", "e n", "a i", "e s", "o u", "o n", "d e", "r e", "a n", "q u", "e t", "e r", "en t", "i l", "l a", "ai t", "n e", "i t", "a r", "a u", "c h", "i s", "u r", "s e", "e l", "t e", "i n", "le s", "ou r", "a v", "o r", "e u", "an t", "ai s", "l u", "m e", "qu e", "p r", "a s", "o m", "c e", "o i", "el le", "u n", "é t", "d es", "ou s", "an s", "t r", "p ar", "s i", "a l", "u ne", "d u", "r i", "es t", "qu i", "s on", "ai ent", "p as", "t re", "ch e", "d ans", "p our", "s a", "v i", "m ent", "t i", "le ur", "r é", "c on", "ou t", "è re", "c om", "d it", "o is", "ou v", "g e", "av ait", "lu i", "ét ait", "p lu", "r a", "m ais", "ai n", "eu x", "b i", "j e", "plu s", "s es", "i r", "I l", "d é", "m a", "en d", "m on", "il s", "é e", "an d", "s ur", "l i", "c i", "il le", "v ous", "v o", "bi en", "m p", "a p", "av e", "s er", "oi r", "t out", "m i", "s u", "d on", "r ent", "t u", "g r", "c es", "et it", "i er", "t er", "c o", "p etit", "a b", "on s", "j our", "com me", "r o", "r es", "ave c", "e au", "é s", "or t", "é c", "u is", "L e", "u t", "è rent", "f f", "l le", "o in", "g ar", "g n", "en f", "u s", "or s", "b r", "è s", "t a", "qu el", "et te", "c ou", "m es", "vi e", "d i", "g u", "ai re", "p er" ] } }