huyue012 commited on
Commit
6f362a7
1 Parent(s): 828f9ca

add tokenizer

Browse files
Files changed (3) hide show
  1. special_tokens_map.json +1 -0
  2. tokenizer_config.json +1 -0
  3. vocab.json +1 -0
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>"}
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
1
+ {"unk_token": "<unk>", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "<pad>", "do_lower_case": false, "word_delimiter_token": "|", "tokenizer_class": "Wav2Vec2CTCTokenizer"}
vocab.json ADDED
@@ -0,0 +1 @@
 
1
+ {"[": 0, "i": 1, "u": 2, "l": 3, "5": 4, "w": 5, "&": 6, "q": 7, "8": 8, "p": 9, "b": 10, "x": 11, "$": 12, "z": 13, "+": 15, "h": 16, "3": 17, "7": 18, "r": 19, "a": 20, "g": 21, "'": 22, "c": 23, "2": 24, "j": 25, "m": 26, "e": 27, "0": 28, "t": 29, "o": 30, "1": 31, "]": 32, "v": 33, ">": 34, "4": 35, "k": 36, "f": 37, "n": 38, "6": 39, "s": 40, "_": 41, "<": 42, "9": 43, "y": 44, "d": 45, "|": 14, "<unk>": 46, "<pad>": 47}