add tokenizer
Browse files- special_tokens_map.json +1 -0
- tokenizer_config.json +1 -0
- vocab.json +1 -0
special_tokens_map.json
ADDED
@@ -0,0 +1 @@
|
|
|
1 |
+
{"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>"}
|
tokenizer_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
1 |
+
{"unk_token": "<unk>", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "<pad>", "do_lower_case": false, "word_delimiter_token": "|", "tokenizer_class": "Wav2Vec2CTCTokenizer"}
|
vocab.json
ADDED
@@ -0,0 +1 @@
|
|
|
1 |
+
{"[": 0, "i": 1, "u": 2, "l": 3, "5": 4, "w": 5, "&": 6, "q": 7, "8": 8, "p": 9, "b": 10, "x": 11, "$": 12, "z": 13, "+": 15, "h": 16, "3": 17, "7": 18, "r": 19, "a": 20, "g": 21, "'": 22, "c": 23, "2": 24, "j": 25, "m": 26, "e": 27, "0": 28, "t": 29, "o": 30, "1": 31, "]": 32, "v": 33, ">": 34, "4": 35, "k": 36, "f": 37, "n": 38, "6": 39, "s": 40, "_": 41, "<": 42, "9": 43, "y": 44, "d": 45, "|": 14, "<unk>": 46, "<pad>": 47}
|