juierror commited on
Commit
eceb3fa
1 Parent(s): d56f755

add tokenizer

Browse files
added_tokens.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"<s>": 94, "</s>": 95}
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]", "additional_special_tokens": [{"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}]}
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|", "special_tokens_map_file": null, "tokenizer_file": null, "name_or_path": "./", "tokenizer_class": "Wav2Vec2CTCTokenizer"}
vocab.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"a": 1, "b": 2, "c": 3, "d": 4, "e": 5, "f": 6, "g": 7, "h": 8, "i": 9, "j": 10, "k": 11, "l": 12, "m": 13, "n": 14, "o": 15, "p": 16, "r": 17, "s": 18, "t": 19, "u": 20, "v": 21, "w": 22, "x": 23, "y": 24, "z": 25, "̇": 26, "ก": 27, "ข": 28, "ค": 29, "ฆ": 30, "ง": 31, "จ": 32, "ฉ": 33, "ช": 34, "ซ": 35, "ญ": 36, "ฎ": 37, "ฏ": 38, "ฐ": 39, "ฑ": 40, "ฒ": 41, "ณ": 42, "ด": 43, "ต": 44, "ถ": 45, "ท": 46, "ธ": 47, "น": 48, "บ": 49, "ป": 50, "ผ": 51, "ฝ": 52, "พ": 53, "ฟ": 54, "ภ": 55, "ม": 56, "ย": 57, "ร": 58, "ฤ": 59, "ล": 60, "ว": 61, "ศ": 62, "ษ": 63, "ส": 64, "ห": 65, "ฬ": 66, "อ": 67, "ฮ": 68, "ะ": 69, "ั": 70, "า": 71, "ำ": 72, "ิ": 73, "ี": 74, "ึ": 75, "ื": 76, "ุ": 77, "ู": 78, "เ": 79, "แ": 80, "โ": 81, "ใ": 82, "ไ": 83, "ๅ": 84, "ๆ": 85, "็": 86, "่": 87, "้": 88, "๊": 89, "๋": 90, "์": 91, "|": 0, "[UNK]": 92, "[PAD]": 93}