thocheat commited on
Commit
e779fba
1 Parent(s): 4f23731

add tokenizer

Browse files
Files changed (1) hide show
  1. vocab.json +1 -1
vocab.json CHANGED
@@ -1 +1 @@
1
- {"": 0, "t": 1, "é": 2, "": 3, "s": 4, "x": 5, "â": 6, "b": 7, "ê": 8, "": 9, "": 10, "m": 11, "ă": 12, "": 13, "v": 14, "á": 15, "": 16, "ò": 17, "ì": 18, "": 19, "y": 20, "d": 21, "": 22, "í": 23, "": 24, "": 25, "o": 26, "u": 27, "": 28, "": 29, "": 30, "": 31, "": 32, "": 33, "": 34, "": 35, "": 36, "à": 37, "": 38, "ó": 39, "": 40, "e": 41, "ù": 42, "p": 43, "": 44, "": 45, "ế": 46, "a": 47, "": 48, "l": 49, "": 50, "": 51, "q": 52, "": 53, "c": 54, "": 55, "": 56, "": 57, "ô": 58, "": 59, "ư": 60, "n": 61, "": 62, "": 63, "đ": 64, "ũ": 65, "": 66, "ý": 67, "k": 68, "r": 69, "": 70, "g": 71, "ú": 73, "h": 74, "": 75, "4": 76, "": 77, "": 78, "": 79, "õ": 80, "": 81, "": 82, "ĩ": 83, "ơ": 84, "": 85, "ã": 86, "": 87, "i": 88, "": 89, "è": 90, "|": 72, "[UNK]": 91, "[PAD]": 92}
 
1
+ {"P": 0, "Í": 1, "": 2, "Ê": 3, "": 4, "": 5, "": 6, "": 7, "Ì": 8, "": 9, "": 10, "Ý": 11, "È": 12, "": 13, "": 14, "L": 15, "": 16, "K": 17, "O": 18, "Ư": 19, "": 20, "": 21, "H": 22, "M": 23, "": 24, "": 25, "": 26, "Â": 27, "Ã": 28, ":": 29, "": 30, "B": 31, "Ù": 32, "": 33, "": 34, "": 35, "D": 36, "A": 37, "": 38, "C": 39, "": 40, "": 41, "Q": 42, "Ĩ": 43, "Ă": 44, "": 45, "É": 46, "": 47, "": 48, "": 49, "I": 50, "Ó": 51, "": 52, "E": 53, "V": 54, "Ò": 55, "N": 56, "": 57, "R": 58, "Y": 59, "": 60, "": 61, "U": 62, "T": 63, "": 64, "": 65, "Ú": 66, "": 67, "À": 68, "X": 69, "": 70, "": 71, "": 72, "": 73, "Ơ": 75, "": 76, "G": 77, "4": 78, "Á": 79, "": 80, "": 81, "": 82, "S": 83, "": 84, "Õ": 85, "Ô": 86, "Ũ": 87, "Đ": 88, "": 89, "": 90, "Ợ": 91, "|": 74, "[UNK]": 92, "[PAD]": 93}