ehartford commited on
Commit
13e4530
1 Parent(s): 9dbf330

Upload tokenizer.json

Browse files
Files changed (1) hide show
  1. tokenizer.json +63 -4
tokenizer.json CHANGED
@@ -2347,10 +2347,69 @@
2347
  ]
2348
  },
2349
  "post_processor": {
2350
- "type": "ByteLevel",
2351
- "add_prefix_space": true,
2352
- "trim_offsets": false,
2353
- "use_regex": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2354
  },
2355
  "decoder": {
2356
  "type": "ByteLevel",
 
2347
  ]
2348
  },
2349
  "post_processor": {
2350
+ "type": "Sequence",
2351
+ "processors": [
2352
+ {
2353
+ "type": "ByteLevel",
2354
+ "add_prefix_space": true,
2355
+ "trim_offsets": false,
2356
+ "use_regex": true
2357
+ },
2358
+ {
2359
+ "type": "TemplateProcessing",
2360
+ "single": [
2361
+ {
2362
+ "SpecialToken": {
2363
+ "id": "<|begin_of_text|>",
2364
+ "type_id": 0
2365
+ }
2366
+ },
2367
+ {
2368
+ "Sequence": {
2369
+ "id": "A",
2370
+ "type_id": 0
2371
+ }
2372
+ }
2373
+ ],
2374
+ "pair": [
2375
+ {
2376
+ "SpecialToken": {
2377
+ "id": "<|begin_of_text|>",
2378
+ "type_id": 0
2379
+ }
2380
+ },
2381
+ {
2382
+ "Sequence": {
2383
+ "id": "A",
2384
+ "type_id": 0
2385
+ }
2386
+ },
2387
+ {
2388
+ "SpecialToken": {
2389
+ "id": "<|begin_of_text|>",
2390
+ "type_id": 1
2391
+ }
2392
+ },
2393
+ {
2394
+ "Sequence": {
2395
+ "id": "B",
2396
+ "type_id": 1
2397
+ }
2398
+ }
2399
+ ],
2400
+ "special_tokens": {
2401
+ "<|begin_of_text|>": {
2402
+ "id": "<|begin_of_text|>",
2403
+ "ids": [
2404
+ 128000
2405
+ ],
2406
+ "tokens": [
2407
+ "<|begin_of_text|>"
2408
+ ]
2409
+ }
2410
+ }
2411
+ }
2412
+ ]
2413
  },
2414
  "decoder": {
2415
  "type": "ByteLevel",