asahi417 commited on
Commit
0675cd3
1 Parent(s): 03393f8

add tokenizer

Browse files
Files changed (3) hide show
  1. special_tokens_map.json +1 -15
  2. tokenizer.json +2 -4
  3. tokenizer_config.json +1 -16
special_tokens_map.json CHANGED
@@ -1,15 +1 @@
1
- {
2
- "bos_token": "<s>",
3
- "cls_token": "<s>",
4
- "eos_token": "</s>",
5
- "mask_token": {
6
- "content": "<mask>",
7
- "lstrip": true,
8
- "normalized": false,
9
- "rstrip": false,
10
- "single_word": false
11
- },
12
- "pad_token": "<pad>",
13
- "sep_token": "</s>",
14
- "unk_token": "<unk>"
15
- }
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "sep_token": "</s>", "pad_token": "<pad>", "cls_token": "<s>", "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": false}}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
tokenizer.json CHANGED
@@ -67,8 +67,7 @@
67
  "pre_tokenizer": {
68
  "type": "ByteLevel",
69
  "add_prefix_space": false,
70
- "trim_offsets": true,
71
- "use_regex": true
72
  },
73
  "post_processor": {
74
  "type": "RobertaProcessing",
@@ -86,8 +85,7 @@
86
  "decoder": {
87
  "type": "ByteLevel",
88
  "add_prefix_space": true,
89
- "trim_offsets": true,
90
- "use_regex": true
91
  },
92
  "model": {
93
  "type": "BPE",
 
67
  "pre_tokenizer": {
68
  "type": "ByteLevel",
69
  "add_prefix_space": false,
70
+ "trim_offsets": true
 
71
  },
72
  "post_processor": {
73
  "type": "RobertaProcessing",
 
85
  "decoder": {
86
  "type": "ByteLevel",
87
  "add_prefix_space": true,
88
+ "trim_offsets": true
 
89
  },
90
  "model": {
91
  "type": "BPE",
tokenizer_config.json CHANGED
@@ -1,16 +1 @@
1
- {
2
- "add_prefix_space": false,
3
- "bos_token": "<s>",
4
- "cls_token": "<s>",
5
- "eos_token": "</s>",
6
- "errors": "replace",
7
- "mask_token": "<mask>",
8
- "model_max_length": 512,
9
- "name_or_path": "roberta-base",
10
- "pad_token": "<pad>",
11
- "sep_token": "</s>",
12
- "special_tokens_map_file": null,
13
- "tokenizer_class": "RobertaTokenizer",
14
- "trim_offsets": true,
15
- "unk_token": "<unk>"
16
- }
 
1
+ {"unk_token": "<unk>", "bos_token": "<s>", "eos_token": "</s>", "add_prefix_space": false, "errors": "replace", "sep_token": "</s>", "cls_token": "<s>", "pad_token": "<pad>", "mask_token": "<mask>", "trim_offsets": true, "model_max_length": 512, "name_or_path": "relbert-roberta-base-semeval2012-v6-average-prompt-d-loob-0", "special_tokens_map_file": null, "tokenizer_class": "RobertaTokenizer"}