helboukkouri commited on
Commit
e23629c
1 Parent(s): f87969e

Update tokenizer_config.json

Browse files

- formatted the JSON
- added character_id key/value pairs for 3 special tokens (unk/sep/pad)

Files changed (1) hide show
  1. tokenizer_config.json +21 -1
tokenizer_config.json CHANGED
@@ -1 +1,21 @@
1
- {"name_or_path": "helboukkouri/character-bert", "tokenizer_class": "CharacterBertTokenizer", "max_word_length": 50, "do_lower_case": true, "do_basic_tokenize": true, "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "auto_map": {"AutoTokenizer": ["tokenization_character_bert.CharacterBertTokenizer", null]}}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name_or_path": "helboukkouri/character-bert",
3
+ "tokenizer_class": "CharacterBertTokenizer",
4
+ "max_word_length": 50,
5
+ "do_lower_case": true,
6
+ "do_basic_tokenize": true,
7
+ "bow_character_id": 258,
8
+ "eow_character_id": 259,
9
+ "pad_character_id": 260,
10
+ "unk_token": "[UNK]",
11
+ "sep_token": "[SEP]",
12
+ "pad_token": "[PAD]",
13
+ "cls_token": "[CLS]",
14
+ "mask_token": "[MASK]",
15
+ "auto_map": {
16
+ "AutoTokenizer": [
17
+ "tokenization_character_bert.CharacterBertTokenizer",
18
+ null
19
+ ]
20
+ }
21
+ }