jtatman commited on
Commit
ce61eed
·
verified ·
1 Parent(s): 9361c9f

Upload tokenizer

Browse files
Files changed (2) hide show
  1. tokenizer.json +25 -4
  2. tokenizer_config.json +2 -0
tokenizer.json CHANGED
@@ -275,10 +275,30 @@
275
  "use_regex": true
276
  },
277
  "post_processor": {
278
- "type": "ByteLevel",
279
- "add_prefix_space": false,
280
- "trim_offsets": true,
281
- "use_regex": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
282
  },
283
  "decoder": {
284
  "type": "ByteLevel",
@@ -294,6 +314,7 @@
294
  "end_of_word_suffix": null,
295
  "fuse_unk": false,
296
  "byte_fallback": false,
 
297
  "vocab": {
298
  "<|endoftext|>": 0,
299
  "<|padding|>": 1,
 
275
  "use_regex": true
276
  },
277
  "post_processor": {
278
+ "type": "TemplateProcessing",
279
+ "single": [
280
+ {
281
+ "Sequence": {
282
+ "id": "A",
283
+ "type_id": 0
284
+ }
285
+ }
286
+ ],
287
+ "pair": [
288
+ {
289
+ "Sequence": {
290
+ "id": "A",
291
+ "type_id": 0
292
+ }
293
+ },
294
+ {
295
+ "Sequence": {
296
+ "id": "B",
297
+ "type_id": 1
298
+ }
299
+ }
300
+ ],
301
+ "special_tokens": {}
302
  },
303
  "decoder": {
304
  "type": "ByteLevel",
 
314
  "end_of_word_suffix": null,
315
  "fuse_unk": false,
316
  "byte_fallback": false,
317
+ "ignore_merges": false,
318
  "vocab": {
319
  "<|endoftext|>": 0,
320
  "<|padding|>": 1,
tokenizer_config.json CHANGED
@@ -1,4 +1,6 @@
1
  {
 
 
2
  "add_prefix_space": false,
3
  "added_tokens_decoder": {
4
  "0": {
 
1
  {
2
+ "add_bos_token": false,
3
+ "add_eos_token": false,
4
  "add_prefix_space": false,
5
  "added_tokens_decoder": {
6
  "0": {