ttop324's picture
add tokenizer
158a838
{"a": 1, "c": 2, "d": 3, "f": 4, "g": 5, "m": 6, "n": 7, "o": 8, "p": 9, "s": 10, "u": 11, "x": 12, "z": 13, "ใ€…": 14, "ใ": 15, "ใ‚": 16, "ใƒ": 17, "ใ„": 18, "ใ…": 19, "ใ†": 20, "ใ‡": 21, "ใˆ": 22, "ใ‰": 23, "ใŠ": 24, "ใ‹": 25, "ใŒ": 26, "ใ": 27, "ใŽ": 28, "ใ": 29, "ใ": 30, "ใ‘": 31, "ใ’": 32, "ใ“": 33, "ใ”": 34, "ใ•": 35, "ใ–": 36, "ใ—": 37, "ใ˜": 38, "ใ™": 39, "ใš": 40, "ใ›": 41, "ใœ": 42, "ใ": 43, "ใž": 44, "ใŸ": 45, "ใ ": 46, "ใก": 47, "ใข": 48, "ใฃ": 49, "ใค": 50, "ใฅ": 51, "ใฆ": 52, "ใง": 53, "ใจ": 54, "ใฉ": 55, "ใช": 56, "ใซ": 57, "ใฌ": 58, "ใญ": 59, "ใฎ": 60, "ใฏ": 61, "ใฐ": 62, "ใฑ": 63, "ใฒ": 64, "ใณ": 65, "ใด": 66, "ใต": 67, "ใถ": 68, "ใท": 69, "ใธ": 70, "ใน": 71, "ใบ": 72, "ใป": 73, "ใผ": 74, "ใฝ": 75, "ใพ": 76, "ใฟ": 77, "ใ‚€": 78, "ใ‚": 79, "ใ‚‚": 80, "ใ‚ƒ": 81, "ใ‚„": 82, "ใ‚…": 83, "ใ‚†": 84, "ใ‚‡": 85, "ใ‚ˆ": 86, "ใ‚‰": 87, "ใ‚Š": 88, "ใ‚‹": 89, "ใ‚Œ": 90, "ใ‚": 91, "ใ‚": 92, "ใ‚’": 93, "ใ‚“": 94, "ใ‚”": 95, "ใ‚–": 96, "ใƒผ": 97, "|": 0, "[UNK]": 98, "[PAD]": 99}