arxyzan commited on
Commit
71e5e47
1 Parent(s): 6659637

add tokenizer

Browse files
Files changed (1) hide show
  1. tokenizer.json +4 -2
tokenizer.json CHANGED
@@ -53,7 +53,8 @@
53
  "pre_tokenizer": {
54
  "type": "ByteLevel",
55
  "add_prefix_space": false,
56
- "trim_offsets": true
 
57
  },
58
  "post_processor": {
59
  "type": "RobertaProcessing",
@@ -71,7 +72,8 @@
71
  "decoder": {
72
  "type": "ByteLevel",
73
  "add_prefix_space": true,
74
- "trim_offsets": true
 
75
  },
76
  "model": {
77
  "type": "BPE",
53
  "pre_tokenizer": {
54
  "type": "ByteLevel",
55
  "add_prefix_space": false,
56
+ "trim_offsets": true,
57
+ "use_regex": true
58
  },
59
  "post_processor": {
60
  "type": "RobertaProcessing",
72
  "decoder": {
73
  "type": "ByteLevel",
74
  "add_prefix_space": true,
75
+ "trim_offsets": true,
76
+ "use_regex": true
77
  },
78
  "model": {
79
  "type": "BPE",