ttop324 commited on
Commit
f15a0e1
โ€ข
1 Parent(s): 389ddec

add tokenizer

Browse files
Files changed (3) hide show
  1. special_tokens_map.json +1 -0
  2. tokenizer_config.json +1 -0
  3. vocab.json +1 -0
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]"}
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|", "tokenizer_class": "Wav2Vec2CTCTokenizer"}
vocab.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"a": 1, "c": 2, "d": 3, "f": 4, "g": 5, "m": 6, "n": 7, "o": 8, "p": 9, "s": 10, "u": 11, "x": 12, "z": 13, "ใ€…": 14, "ใ": 15, "ใ‚": 16, "ใƒ": 17, "ใ„": 18, "ใ…": 19, "ใ†": 20, "ใ‡": 21, "ใˆ": 22, "ใ‰": 23, "ใŠ": 24, "ใ‹": 25, "ใŒ": 26, "ใ": 27, "ใŽ": 28, "ใ": 29, "ใ": 30, "ใ‘": 31, "ใ’": 32, "ใ“": 33, "ใ”": 34, "ใ•": 35, "ใ–": 36, "ใ—": 37, "ใ˜": 38, "ใ™": 39, "ใš": 40, "ใ›": 41, "ใœ": 42, "ใ": 43, "ใž": 44, "ใŸ": 45, "ใ ": 46, "ใก": 47, "ใข": 48, "ใฃ": 49, "ใค": 50, "ใฅ": 51, "ใฆ": 52, "ใง": 53, "ใจ": 54, "ใฉ": 55, "ใช": 56, "ใซ": 57, "ใฌ": 58, "ใญ": 59, "ใฎ": 60, "ใฏ": 61, "ใฐ": 62, "ใฑ": 63, "ใฒ": 64, "ใณ": 65, "ใด": 66, "ใต": 67, "ใถ": 68, "ใท": 69, "ใธ": 70, "ใน": 71, "ใบ": 72, "ใป": 73, "ใผ": 74, "ใฝ": 75, "ใพ": 76, "ใฟ": 77, "ใ‚€": 78, "ใ‚": 79, "ใ‚‚": 80, "ใ‚ƒ": 81, "ใ‚„": 82, "ใ‚…": 83, "ใ‚†": 84, "ใ‚‡": 85, "ใ‚ˆ": 86, "ใ‚‰": 87, "ใ‚Š": 88, "ใ‚‹": 89, "ใ‚Œ": 90, "ใ‚": 91, "ใ‚": 92, "ใ‚’": 93, "ใ‚“": 94, "ใ‚”": 95, "ใ‚–": 96, "ใƒผ": 97, "ๅ‰": 98, "็ฆ•": 99, "็นซ": 100, "้‰": 101, "|": 0, "[UNK]": 102, "[PAD]": 103}