LanPham commited on
Commit
8666f04
1 Parent(s): bae0365

add tokenizer

Browse files
Files changed (3) hide show
  1. special_tokens_map.json +1 -0
  2. tokenizer_config.json +1 -0
  3. vocab.json +1 -0
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]"}
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|", "tokenizer_class": "Wav2Vec2CTCTokenizer"}
vocab.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"h": 0, "í": 1, "ậ": 2, "s": 3, "ạ": 4, "ẻ": 5, "ỉ": 6, "t": 7, "k": 8, "ò": 9, "ộ": 10, "ă": 11, "ễ": 12, "ì": 13, "ầ": 14, "u": 15, "ỡ": 16, "ự": 17, "ỗ": 18, "m": 19, "e": 20, "ẫ": 21, "ớ": 22, "ặ": 23, "ợ": 24, "ả": 25, "ủ": 26, "ố": 27, "ẵ": 28, "ắ": 29, "ô": 30, "n": 31, "ý": 32, "ữ": 34, "ẽ": 35, "đ": 36, "o": 37, "ũ": 38, "c": 39, "à": 40, "õ": 41, "ử": 42, "x": 43, "ỹ": 44, "v": 45, "ừ": 46, "ở": 47, "ư": 48, "ỏ": 49, "ệ": 50, "ằ": 51, "ơ": 52, "é": 53, "d": 54, "ế": 55, "r": 56, "è": 57, "ú": 58, "ĩ": 59, "ứ": 60, "l": 61, "ù": 62, "ấ": 63, "ổ": 64, "ờ": 65, "ọ": 66, "ụ": 67, "â": 68, "ồ": 69, "ã": 70, "ể": 71, "p": 72, "ị": 73, "ề": 74, "i": 75, "y": 76, "ê": 77, "q": 78, "b": 79, "a": 80, "g": 81, "ỷ": 82, "ỳ": 83, "ó": 84, "ẹ": 85, "á": 86, "ẩ": 87, "|": 33, "[UNK]": 88, "[PAD]": 89}