davidoj01 commited on
Commit
bc34be7
1 Parent(s): ce2bdec

Upload tokenizer

Browse files
Files changed (1) hide show
  1. tokenizer.json +63 -4
tokenizer.json CHANGED
@@ -2334,10 +2334,69 @@
2334
  ]
2335
  },
2336
  "post_processor": {
2337
- "type": "ByteLevel",
2338
- "add_prefix_space": true,
2339
- "trim_offsets": false,
2340
- "use_regex": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2341
  },
2342
  "decoder": {
2343
  "type": "ByteLevel",
 
2334
  ]
2335
  },
2336
  "post_processor": {
2337
+ "type": "Sequence",
2338
+ "processors": [
2339
+ {
2340
+ "type": "ByteLevel",
2341
+ "add_prefix_space": true,
2342
+ "trim_offsets": false,
2343
+ "use_regex": true
2344
+ },
2345
+ {
2346
+ "type": "TemplateProcessing",
2347
+ "single": [
2348
+ {
2349
+ "SpecialToken": {
2350
+ "id": "<|begin_of_text|>",
2351
+ "type_id": 0
2352
+ }
2353
+ },
2354
+ {
2355
+ "Sequence": {
2356
+ "id": "A",
2357
+ "type_id": 0
2358
+ }
2359
+ }
2360
+ ],
2361
+ "pair": [
2362
+ {
2363
+ "SpecialToken": {
2364
+ "id": "<|begin_of_text|>",
2365
+ "type_id": 0
2366
+ }
2367
+ },
2368
+ {
2369
+ "Sequence": {
2370
+ "id": "A",
2371
+ "type_id": 0
2372
+ }
2373
+ },
2374
+ {
2375
+ "SpecialToken": {
2376
+ "id": "<|begin_of_text|>",
2377
+ "type_id": 1
2378
+ }
2379
+ },
2380
+ {
2381
+ "Sequence": {
2382
+ "id": "B",
2383
+ "type_id": 1
2384
+ }
2385
+ }
2386
+ ],
2387
+ "special_tokens": {
2388
+ "<|begin_of_text|>": {
2389
+ "id": "<|begin_of_text|>",
2390
+ "ids": [
2391
+ 128000
2392
+ ],
2393
+ "tokens": [
2394
+ "<|begin_of_text|>"
2395
+ ]
2396
+ }
2397
+ }
2398
+ }
2399
+ ]
2400
  },
2401
  "decoder": {
2402
  "type": "ByteLevel",