NX2411 commited on
Commit
320bc76
·
1 Parent(s): 37c1b9d

add tokenizer

Browse files
Files changed (3) hide show
  1. special_tokens_map.json +1 -6
  2. tokenizer_config.json +1 -10
  3. vocab.json +1 -32
special_tokens_map.json CHANGED
@@ -1,6 +1 @@
1
- {
2
- "bos_token": "<s>",
3
- "eos_token": "</s>",
4
- "pad_token": "[PAD]",
5
- "unk_token": "[UNK]"
6
- }
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]"}
 
 
 
 
 
tokenizer_config.json CHANGED
@@ -1,10 +1 @@
1
- {
2
- "bos_token": "<s>",
3
- "do_lower_case": false,
4
- "eos_token": "</s>",
5
- "pad_token": "[PAD]",
6
- "replace_word_delimiter_char": " ",
7
- "tokenizer_class": "Wav2Vec2CTCTokenizer",
8
- "unk_token": "[UNK]",
9
- "word_delimiter_token": "|"
10
- }
 
1
+ {"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|", "tokenizer_class": "Wav2Vec2CTCTokenizer"}
 
 
 
 
 
 
 
 
 
vocab.json CHANGED
@@ -1,32 +1 @@
1
- {
2
- "'": 15,
3
- "[PAD]": 29,
4
- "[UNK]": 28,
5
- "a": 0,
6
- "b": 11,
7
- "c": 1,
8
- "d": 6,
9
- "e": 20,
10
- "f": 14,
11
- "g": 4,
12
- "h": 13,
13
- "i": 5,
14
- "j": 22,
15
- "k": 27,
16
- "l": 8,
17
- "m": 24,
18
- "n": 25,
19
- "o": 7,
20
- "p": 17,
21
- "q": 19,
22
- "r": 9,
23
- "s": 21,
24
- "t": 18,
25
- "u": 2,
26
- "v": 16,
27
- "w": 26,
28
- "x": 10,
29
- "y": 3,
30
- "z": 23,
31
- "|": 12
32
- }
 
1
+ {"'": 1, "a": 2, "b": 3, "c": 4, "d": 5, "e": 6, "f": 7, "g": 8, "h": 9, "i": 10, "j": 11, "k": 12, "l": 13, "m": 14, "n": 15, "o": 16, "p": 17, "q": 18, "r": 19, "s": 20, "t": 21, "u": 22, "v": 23, "w": 24, "x": 25, "y": 26, "z": 27, "|": 0, "[UNK]": 28, "[PAD]": 29}