LanPham's picture
add tokenizer
8666f04
raw history blame
No virus
916 Bytes
{"h": 0, "í": 1, "ậ": 2, "s": 3, "ạ": 4, "ẻ": 5, "ỉ": 6, "t": 7, "k": 8, "ò": 9, "ộ": 10, "ă": 11, "ễ": 12, "ì": 13, "ầ": 14, "u": 15, "ỡ": 16, "ự": 17, "ỗ": 18, "m": 19, "e": 20, "ẫ": 21, "ớ": 22, "ặ": 23, "ợ": 24, "ả": 25, "ủ": 26, "ố": 27, "ẵ": 28, "ắ": 29, "ô": 30, "n": 31, "ý": 32, "ữ": 34, "ẽ": 35, "đ": 36, "o": 37, "ũ": 38, "c": 39, "à": 40, "õ": 41, "ử": 42, "x": 43, "ỹ": 44, "v": 45, "ừ": 46, "ở": 47, "ư": 48, "ỏ": 49, "ệ": 50, "ằ": 51, "ơ": 52, "é": 53, "d": 54, "ế": 55, "r": 56, "è": 57, "ú": 58, "ĩ": 59, "ứ": 60, "l": 61, "ù": 62, "ấ": 63, "ổ": 64, "ờ": 65, "ọ": 66, "ụ": 67, "â": 68, "ồ": 69, "ã": 70, "ể": 71, "p": 72, "ị": 73, "ề": 74, "i": 75, "y": 76, "ê": 77, "q": 78, "b": 79, "a": 80, "g": 81, "ỷ": 82, "ỳ": 83, "ó": 84, "ẹ": 85, "á": 86, "ẩ": 87, "|": 33, "[UNK]": 88, "[PAD]": 89}