add tokenizer
Browse files- vocab.json +1 -1
vocab.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"
|
|
|
1 |
+
{"P": 0, "Í": 1, "Ị": 2, "Ê": 3, "Ớ": 4, "Ự": 5, "Ỏ": 6, "Ể": 7, "Ì": 8, "Ạ": 9, "Ộ": 10, "Ý": 11, "È": 12, "Ờ": 13, "Ừ": 14, "L": 15, "Ỹ": 16, "K": 17, "O": 18, "Ư": 19, "Ệ": 20, "Ỡ": 21, "H": 22, "M": 23, "Ẽ": 24, "Ậ": 25, "Ỵ": 26, "Â": 27, "Ã": 28, ":": 29, "Ữ": 30, "B": 31, "Ù": 32, "Ầ": 33, "Ề": 34, "Ỗ": 35, "D": 36, "A": 37, "Ẹ": 38, "C": 39, "Ỷ": 40, "Ỳ": 41, "Q": 42, "Ĩ": 43, "Ă": 44, "Ỉ": 45, "É": 46, "Ố": 47, "Ứ": 48, "Ử": 49, "I": 50, "Ó": 51, "Ẳ": 52, "E": 53, "V": 54, "Ò": 55, "N": 56, "Ắ": 57, "R": 58, "Y": 59, "Ồ": 60, "Ẫ": 61, "U": 62, "T": 63, "Ễ": 64, "Ủ": 65, "Ú": 66, "Ẻ": 67, "À": 68, "X": 69, "Ụ": 70, "Ả": 71, "Ẵ": 72, "Ằ": 73, "Ơ": 75, "Ặ": 76, "G": 77, "4": 78, "Á": 79, "Ế": 80, "Ấ": 81, "Ọ": 82, "S": 83, "Ổ": 84, "Õ": 85, "Ô": 86, "Ũ": 87, "Đ": 88, "Ở": 89, "Ẩ": 90, "Ợ": 91, "|": 74, "[UNK]": 92, "[PAD]": 93}
|