Lasion commited on
Commit
16299a1
1 Parent(s): 4130ee1

Upload tokenizer

Browse files
added_tokens.json CHANGED
@@ -1,6 +1,4 @@
1
  {
2
  "</s>": 111,
3
- "<s>": 110,
4
- "[PAD]": 113,
5
- "[UNK]": 112
6
  }
 
1
  {
2
  "</s>": 111,
3
+ "<s>": 110
 
 
4
  }
special_tokens_map.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "bos_token": "<s>",
3
  "eos_token": "</s>",
4
- "pad_token": "[PAD]",
5
- "unk_token": "[UNK]"
6
  }
 
1
  {
2
  "bos_token": "<s>",
3
  "eos_token": "</s>",
4
+ "pad_token": "<pad>",
5
+ "unk_token": "<unk>"
6
  }
tokenizer_config.json CHANGED
@@ -31,22 +31,6 @@
31
  "rstrip": false,
32
  "single_word": false,
33
  "special": true
34
- },
35
- "112": {
36
- "content": "[UNK]",
37
- "lstrip": false,
38
- "normalized": false,
39
- "rstrip": false,
40
- "single_word": false,
41
- "special": true
42
- },
43
- "113": {
44
- "content": "[PAD]",
45
- "lstrip": false,
46
- "normalized": false,
47
- "rstrip": false,
48
- "single_word": false,
49
- "special": true
50
  }
51
  },
52
  "bos_token": "<s>",
@@ -54,10 +38,10 @@
54
  "do_lower_case": false,
55
  "eos_token": "</s>",
56
  "model_max_length": 1000000000000000019884624838656,
57
- "pad_token": "[PAD]",
58
  "replace_word_delimiter_char": " ",
59
  "target_lang": null,
60
  "tokenizer_class": "Wav2Vec2CTCTokenizer",
61
- "unk_token": "[UNK]",
62
  "word_delimiter_token": "|"
63
  }
 
31
  "rstrip": false,
32
  "single_word": false,
33
  "special": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  }
35
  },
36
  "bos_token": "<s>",
 
38
  "do_lower_case": false,
39
  "eos_token": "</s>",
40
  "model_max_length": 1000000000000000019884624838656,
41
+ "pad_token": "<pad>",
42
  "replace_word_delimiter_char": " ",
43
  "target_lang": null,
44
  "tokenizer_class": "Wav2Vec2CTCTokenizer",
45
+ "unk_token": "<unk>",
46
  "word_delimiter_token": "|"
47
  }