project2you's picture
add tokenizer
c24cb8d
raw
history blame
771 Bytes
{"๋": 0, "่": 1, "ิ": 2, "ง": 3, "ก": 4, "ท": 5, "ศ": 6, "’": 7, "ฤ": 8, "ร": 9, "ซ": 10, "ฒ": 11, "ด": 12, "ธ": 13, "ฏ": 14, "ั": 15, "ณ": 16, "ฟ": 17, "ห": 18, "ฉ": 19, "ำ": 20, "โ": 21, "ภ": 22, "ม": 23, "ฑ": 24, "ถ": 25, "ใ": 26, "ึ": 27, "ะ": 28, "ุ": 29, "เ": 30, "ฝ": 31, "ี": 32, "ํ": 33, "ช": 34, "ว": 35, "ฬ": 36, "ป": 37, "แ": 38, "ฮ": 39, "จ": 40, "ู": 41, "ๅ": 42, "ผ": 43, "ญ": 44, "อ": 45, "๊": 46, "'": 47, "บ": 48, "็": 49, "ื": 50, "ไ": 51, "ฐ": 52, "์": 53, "ๆ": 54, "ฎ": 55, "ล": 56, "า": 57, "ฆ": 59, "ค": 60, "น": 61, "พ": 62, "ษ": 63, "ส": 64, "ย": 65, "ต": 66, "ข": 67, "้": 68, "|": 58, "[UNK]": 69, "[PAD]": 70}