jtatman commited on
Commit
5eca46e
·
verified ·
1 Parent(s): bf58674

Upload tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer.json +4 -25
  2. tokenizer_config.json +0 -2
tokenizer.json CHANGED
@@ -275,30 +275,10 @@
275
  "use_regex": true
276
  },
277
  "post_processor": {
278
- "type": "TemplateProcessing",
279
- "single": [
280
- {
281
- "Sequence": {
282
- "id": "A",
283
- "type_id": 0
284
- }
285
- }
286
- ],
287
- "pair": [
288
- {
289
- "Sequence": {
290
- "id": "A",
291
- "type_id": 0
292
- }
293
- },
294
- {
295
- "Sequence": {
296
- "id": "B",
297
- "type_id": 1
298
- }
299
- }
300
- ],
301
- "special_tokens": {}
302
  },
303
  "decoder": {
304
  "type": "ByteLevel",
@@ -314,7 +294,6 @@
314
  "end_of_word_suffix": null,
315
  "fuse_unk": false,
316
  "byte_fallback": false,
317
- "ignore_merges": false,
318
  "vocab": {
319
  "<|endoftext|>": 0,
320
  "<|padding|>": 1,
 
275
  "use_regex": true
276
  },
277
  "post_processor": {
278
+ "type": "ByteLevel",
279
+ "add_prefix_space": false,
280
+ "trim_offsets": true,
281
+ "use_regex": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
282
  },
283
  "decoder": {
284
  "type": "ByteLevel",
 
294
  "end_of_word_suffix": null,
295
  "fuse_unk": false,
296
  "byte_fallback": false,
 
297
  "vocab": {
298
  "<|endoftext|>": 0,
299
  "<|padding|>": 1,
tokenizer_config.json CHANGED
@@ -1,6 +1,4 @@
1
  {
2
- "add_bos_token": false,
3
- "add_eos_token": false,
4
  "add_prefix_space": false,
5
  "added_tokens_decoder": {
6
  "0": {
 
1
  {
 
 
2
  "add_prefix_space": false,
3
  "added_tokens_decoder": {
4
  "0": {