{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "UNK", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "PAD", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 154, "content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": { "type": "Sequence", "normalizers": [ { "type": "Replace", "pattern": { "String": "\n" }, "content": " UTT_BOUNDARY" }, { "type": "Strip", "strip_left": true, "strip_right": true } ] }, "pre_tokenizer": { "type": "Whitespace" }, "post_processor": null, "decoder": null, "model": { "type": "WordLevel", "vocab": { "UNK": 0, "PAD": 1, "BOS": 2, "EOS": 3, "WORD_BOUNDARY": 4, "UTT_BOUNDARY": 5, "aː˧": 6, "t": 7, "ɐ˥": 8, "k": 9, "l": 10, "j": 11, "ʊ˥": 12, "aː˧˩̰": 13, "ɛː˥": 14, "n": 15, "ei˩˧": 16, "w": 17, "aː˨": 18, "ɐi˧˥": 19, "m̩˧˥": 20, "m": 21, "ou˥": 22, "aː˧˥": 23, "ei˥": 24, "iː˧": 25, "ts": 26, "ɔː˧˥": 27, "tʰ": 28, "iː˥": 29, "f": 30, "aːĭ˧": 31, "ɐ˨": 32, "p": 33, "h": 34, "ɵy˧": 35, "aː˥": 36, "ou˨": 37, "ɔː˧": 38, "ɐi˧˩̰": 39, "uː˧": 40, "ŋ": 41, "s": 42, "ɔːĭ˥": 43, "ɐu˨": 44, "iː˨": 45, "ei˧˥": 46, "ɐi˨": 47, "ʊ˧˩̰": 48, "ʊ˨": 49, "aː˩˧": 50, "aːĭ˧˥": 51, "ɔː˨": 52, "ɛː˩˧": 53, "ɪ˨": 54, "iːŭ˧": 55, "ɛː˧˩̰": 56, "ɪ˧˥": 57, "̩˧˩̰": 58, "ɵ˧˥": 59, "ei˧": 60, "ɐu˧˩̰": 61, "m̩˧": 62, "ɐu˧˥": 63, "ɐu˩˧": 64, "ɐi˥": 65, "ɔː˥": 66, "ɔːĭ˧": 67, "ou˧˥": 68, "ou˩˧": 69, "ɐ˧": 70, "tsʰ": 71, "ɛː˧˥": 72, "iː˧˥": 73, "ɔː˩˧": 74, "kʰ": 75, "ɐ˧˩̰": 76, "aːŭ˧˥": 77, "pʰ": 78, "aːĭ˧˩̰": 79, "ɵy˩˧": 80, "ɵ˧": 81, "ɛː˧": 82, "ei˧˩̰": 83, "uː˧˥": 84, "ɔː˧˩̰": 85, "ɛː˨": 86, "uː˥": 87, "ʊ˧": 88, "iː˧˩̰": 89, "yː˨": 90, "aːŭ˧": 91, "œː˩˧": 92, "ɐ˧˥": 93, "iː˩˧": 94, "ɪ˧˩̰": 95, "iːŭ˧˩̰": 96, "œː˧˥": 97, "yː˧": 98, "uːĭ˩˧": 99, "ɵy˧˥": 100, "yː˧˩̰": 101, "ɔːĭ˧˥": 102, "ɛː": 103, "u˨": 104, "ou˧": 105, "ei˨": 106, "ɐu˥": 107, "ɵ˥": 108, "uː˧˩̰": 109, "yː˥": 110, "ɪ˥": 111, "œː˥": 112, "œː˧˩̰": 113, "aːĭ˨": 114, "ɐ˩˧": 115, "œː˧": 116, "uːĭ˧˥": 117, "ɐu˧": 118, "ɐi˧": 119, "ou˧˩̰": 120, "aːĭ˥": 121, "aːŭ˥": 122, "yː˧˥": 123, "iːŭ˥": 124, "ɔːĭ˨": 125, "ʊ˧˥": 126, "m̩˥": 127, "iːŭ˧˥": 128, "ɐi˩˧": 129, "ɵy˥": 130, "uːĭ˧": 131, "ɵy˧˩̰": 132, "uːĭ˥": 133, "aːŭ˧˩̰": 134, "yː˩˧": 135, "ɔːĭ˧˩̰": 136, "aːŭ˩˧": 137, "aːĭ˩˧": 138, "uːĭ˨": 139, "œː˨": 140, "uː˨": 141, "ɵy˨": 142, "aːŭ˨": 143, "m̩˩˧": 144, "ŋ˩˧": 145, "ɪ˧": 146, "m̩˨": 147, "iːŭ˩˧": 148, "iːŭ˨": 149, "ɵ˨": 150, "uːĭ˧˩̰": 151, "uː˩˧": 152, "ɵ˧˩̰": 153 }, "unk_token": "UNK" } }