phongdtd commited on
Commit
b344cac
·
1 Parent(s): 0fc29d1

add tokenizer

Browse files
Files changed (1) hide show
  1. vocab.json +1 -1
vocab.json CHANGED
@@ -1 +1 @@
1
- {"": 0, "": 1, "": 2, "": 3, "ĩ": 4, "": 5, "": 6, "m": 7, "o": 8, "": 9, "": 10, "": 11, "": 12, "y": 13, "ũ": 14, "": 15, "": 16, "s": 17, "": 18, "": 19, "": 20, "": 21, "ô": 22, "": 23, "đ": 24, "ù": 25, "b": 26, "": 27, "": 28, "l": 29, "t": 30, "j": 31, "ă": 32, "": 33, "": 34, "f": 35, "": 36, "ó": 37, "": 38, "": 39, "h": 40, "k": 41, "e": 42, "c": 43, "": 45, "v": 46, "": 47, "": 48, "": 49, "p": 50, "ơ": 51, "": 52, "z": 53, "é": 54, "": 55, "": 56, "ế": 57, "ú": 58, "": 59, "r": 60, "ý": 61, "g": 62, "õ": 63, "q": 64, "n": 65, "": 66, "í": 67, "": 68, "ã": 69, "u": 70, "è": 71, "": 72, "ì": 73, "a": 74, "w": 75, "d": 76, "ò": 77, "": 78, "": 79, "": 80, "i": 81, "ư": 82, "": 83, "á": 84, "â": 85, "à": 86, "x": 87, "": 88, "": 89, "": 90, "": 91, "": 92, "ê": 93, "|": 44, "[UNK]": 94, "[PAD]": 95}
 
1
+ {"e": 0, "à": 1, "d": 2, "j": 3, "k": 4, "": 5, "": 6, "ế": 7, "ê": 8, "q": 9, "ý": 10, "": 11, "b": 12, "": 13, "": 14, "í": 15, "ò": 16, "ó": 17, "": 18, "ú": 19, "l": 20, "": 21, "": 22, "": 23, "è": 24, "ì": 26, "a": 27, "n": 28, "ù": 29, "": 30, "": 31, "â": 32, "h": 33, "": 34, "": 35, "i": 36, "": 37, "w": 38, "ă": 39, "c": 40, "": 41, "": 42, "g": 43, "": 44, "ẵ": 45, "r": 46, "": 47, "": 48, "ư": 49, "á": 50, "": 51, "": 52, "": 53, "y": 54, "": 55, "ũ": 56, "đ": 57, "": 58, "": 59, "": 60, "ĩ": 61, "ô": 62, "é": 63, "m": 64, "u": 65, "x": 66, "": 67, "": 68, "": 69, "s": 70, "ã": 71, "": 72, "": 73, "": 74, "": 75, "t": 76, "ơ": 77, "v": 78, "": 79, "o": 80, "z": 81, "": 82, "": 83, "f": 84, "": 85, "": 86, "": 87, "õ": 88, "": 89, "p": 90, "": 91, "": 92, "": 93, "|": 25, "[UNK]": 94, "[PAD]": 95}