asahi417 commited on
Commit
8ffe27d
1 Parent(s): 4ed0089

add tokenizer

Browse files
added_tokens.json CHANGED
@@ -1,3 +1 @@
1
- {
2
- "[MASK]": 128000
3
- }
1
+ {"[MASK]": 128000}
 
 
special_tokens_map.json CHANGED
@@ -1,9 +1 @@
1
- {
2
- "bos_token": "[CLS]",
3
- "cls_token": "[CLS]",
4
- "eos_token": "[SEP]",
5
- "mask_token": "[MASK]",
6
- "pad_token": "[PAD]",
7
- "sep_token": "[SEP]",
8
- "unk_token": "[UNK]"
9
- }
1
+ {"bos_token": "[CLS]", "eos_token": "[SEP]", "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]"}
 
 
 
 
 
 
 
 
tokenizer_config.json CHANGED
@@ -1,16 +1 @@
1
- {
2
- "bos_token": "[CLS]",
3
- "cls_token": "[CLS]",
4
- "do_lower_case": false,
5
- "eos_token": "[SEP]",
6
- "mask_token": "[MASK]",
7
- "name_or_path": "deberta-v3-large-conll2003",
8
- "pad_token": "[PAD]",
9
- "sep_token": "[SEP]",
10
- "sp_model_kwargs": {},
11
- "special_tokens_map_file": null,
12
- "split_by_punct": false,
13
- "tokenizer_class": "DebertaV2Tokenizer",
14
- "unk_token": "[UNK]",
15
- "vocab_type": "spm"
16
- }
1
+ {"do_lower_case": false, "bos_token": "[CLS]", "eos_token": "[SEP]", "unk_token": "[UNK]", "sep_token": "[SEP]", "pad_token": "[PAD]", "cls_token": "[CLS]", "mask_token": "[MASK]", "split_by_punct": false, "sp_model_kwargs": {}, "name_or_path": "deberta-v3-large-conll2003", "special_tokens_map_file": null, "vocab_type": "spm", "tokenizer_file": "deberta-v3-large-conll2003/tokenizer.json", "tokenizer_class": "DebertaV2Tokenizer"}