TatonkaHF commited on
Commit
2bc192c
1 Parent(s): 568d9dd

Tokenizer fix

Browse files

Got "data did not match any variant of untagged enum PyPreTokenizerTypeWrapper at line 90 column 3" error. Found that tokenizer.json causes it, reinialized it from previous model.

Files changed (1) hide show
  1. tokenizer.json +5 -5
tokenizer.json CHANGED
@@ -85,8 +85,8 @@
85
  "pre_tokenizer": {
86
  "type": "Metaspace",
87
  "replacement": "▁",
88
- "prepend_scheme": "always",
89
- "split": true
90
  },
91
  "post_processor": {
92
  "type": "TemplateProcessing",
@@ -172,8 +172,8 @@
172
  "decoder": {
173
  "type": "Metaspace",
174
  "replacement": "▁",
175
- "prepend_scheme": "always",
176
- "split": true
177
  },
178
  "model": {
179
  "type": "Unigram",
@@ -184846,4 +184846,4 @@
184846
  ],
184847
  "byte_fallback": false
184848
  }
184849
- }
 
85
  "pre_tokenizer": {
86
  "type": "Metaspace",
87
  "replacement": "▁",
88
+ "add_prefix_space": true,
89
+ "prepend_scheme": "always"
90
  },
91
  "post_processor": {
92
  "type": "TemplateProcessing",
 
172
  "decoder": {
173
  "type": "Metaspace",
174
  "replacement": "▁",
175
+ "add_prefix_space": true,
176
+ "prepend_scheme": "always"
177
  },
178
  "model": {
179
  "type": "Unigram",
 
184846
  ],
184847
  "byte_fallback": false
184848
  }
184849
+ }