Rebecca Iglesias-Flores commited on
Commit
25fe238
1 Parent(s): 5a72a0d

Upload tokenizer

Browse files
Files changed (3) hide show
  1. special_tokens_map.json +2 -2
  2. tokenizer_config.json +2 -2
  3. vocab.json +2 -3
special_tokens_map.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "bos_token": "<s>",
3
  "eos_token": "</s>",
4
- "pad_token": "[PAD]",
5
- "unk_token": "[UNK]"
6
  }
 
1
  {
2
  "bos_token": "<s>",
3
  "eos_token": "</s>",
4
+ "pad_token": "<pad>",
5
+ "unk_token": "<unk>"
6
  }
tokenizer_config.json CHANGED
@@ -4,10 +4,10 @@
4
  "do_lower_case": false,
5
  "eos_token": "</s>",
6
  "model_max_length": 1000000000000000019884624838656,
7
- "pad_token": "[PAD]",
8
  "replace_word_delimiter_char": " ",
9
  "target_lang": null,
10
  "tokenizer_class": "Wav2Vec2CTCTokenizer",
11
- "unk_token": "[UNK]",
12
  "word_delimiter_token": "|"
13
  }
 
4
  "do_lower_case": false,
5
  "eos_token": "</s>",
6
  "model_max_length": 1000000000000000019884624838656,
7
+ "pad_token": "<pad>",
8
  "replace_word_delimiter_char": " ",
9
  "target_lang": null,
10
  "tokenizer_class": "Wav2Vec2CTCTokenizer",
11
+ "unk_token": "<unk>",
12
  "word_delimiter_token": "|"
13
  }
vocab.json CHANGED
@@ -1,7 +1,6 @@
1
  {
2
- "'": 14,
3
- "[PAD]": 29,
4
- "[UNK]": 28,
5
  "a": 1,
6
  "b": 2,
7
  "c": 3,
 
1
  {
2
+ "<pad>": 28,
3
+ "<unk>": 27,
 
4
  "a": 1,
5
  "b": 2,
6
  "c": 3,