thocheat's picture
add tokenizer
8cf8291
raw
history blame contribute delete
947 Bytes
{"ậ": 0, "k": 1, "ả": 2, "p": 3, "ị": 4, "ễ": 5, "v": 7, "á": 8, "ỗ": 9, "ô": 10, "ă": 11, "ò": 12, "ở": 13, "o": 14, "ẩ": 15, "t": 16, "ơ": 17, "ỳ": 18, "ứ": 19, "ý": 20, "ỉ": 21, "ệ": 22, "ổ": 23, "q": 24, "ã": 25, "ầ": 26, "ũ": 27, "ờ": 28, "d": 29, "l": 30, "ù": 31, "ẹ": 32, "ạ": 33, "ĩ": 34, "g": 35, "r": 36, "è": 37, "ọ": 38, "ê": 39, "ợ": 40, "ụ": 41, "ể": 42, "ó": 43, "s": 44, "ỵ": 45, "ộ": 46, "ì": 47, "h": 48, "ằ": 49, "ú": 50, "ẵ": 51, "ữ": 52, "õ": 53, "ẳ": 54, "ố": 55, "m": 56, "ế": 57, "ề": 58, "ẫ": 59, "ự": 60, "đ": 61, "í": 62, "ớ": 63, "e": 64, "a": 65, "ỷ": 66, "ủ": 67, "ắ": 68, "ỹ": 69, "ồ": 70, "x": 71, "ỏ": 72, "ử": 73, "u": 74, "ư": 75, "ỡ": 76, "n": 77, "ẽ": 78, "à": 79, "y": 80, "b": 81, "é": 82, "c": 83, "i": 84, "ừ": 85, "â": 86, "4": 87, "ẻ": 88, "ấ": 89, "ặ": 90, "|": 6, "[UNK]": 91, "[PAD]": 92}