alexmarques commited on
Commit
1455f0f
1 Parent(s): 8f210e8

Upload tokenizer.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. tokenizer.json +64 -10
tokenizer.json CHANGED
@@ -1,11 +1,6 @@
1
  {
2
  "version": "1.0",
3
- "truncation": {
4
- "direction": "Right",
5
- "max_length": 4096,
6
- "strategy": "LongestFirst",
7
- "stride": 0
8
- },
9
  "padding": null,
10
  "added_tokens": [
11
  {
@@ -2334,10 +2329,69 @@
2334
  ]
2335
  },
2336
  "post_processor": {
2337
- "type": "ByteLevel",
2338
- "add_prefix_space": true,
2339
- "trim_offsets": false,
2340
- "use_regex": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2341
  },
2342
  "decoder": {
2343
  "type": "ByteLevel",
 
1
  {
2
  "version": "1.0",
3
+ "truncation": null,
 
 
 
 
 
4
  "padding": null,
5
  "added_tokens": [
6
  {
 
2329
  ]
2330
  },
2331
  "post_processor": {
2332
+ "type": "Sequence",
2333
+ "processors": [
2334
+ {
2335
+ "type": "ByteLevel",
2336
+ "add_prefix_space": true,
2337
+ "trim_offsets": false,
2338
+ "use_regex": true
2339
+ },
2340
+ {
2341
+ "type": "TemplateProcessing",
2342
+ "single": [
2343
+ {
2344
+ "SpecialToken": {
2345
+ "id": "<|begin_of_text|>",
2346
+ "type_id": 0
2347
+ }
2348
+ },
2349
+ {
2350
+ "Sequence": {
2351
+ "id": "A",
2352
+ "type_id": 0
2353
+ }
2354
+ }
2355
+ ],
2356
+ "pair": [
2357
+ {
2358
+ "SpecialToken": {
2359
+ "id": "<|begin_of_text|>",
2360
+ "type_id": 0
2361
+ }
2362
+ },
2363
+ {
2364
+ "Sequence": {
2365
+ "id": "A",
2366
+ "type_id": 0
2367
+ }
2368
+ },
2369
+ {
2370
+ "SpecialToken": {
2371
+ "id": "<|begin_of_text|>",
2372
+ "type_id": 1
2373
+ }
2374
+ },
2375
+ {
2376
+ "Sequence": {
2377
+ "id": "B",
2378
+ "type_id": 1
2379
+ }
2380
+ }
2381
+ ],
2382
+ "special_tokens": {
2383
+ "<|begin_of_text|>": {
2384
+ "id": "<|begin_of_text|>",
2385
+ "ids": [
2386
+ 128000
2387
+ ],
2388
+ "tokens": [
2389
+ "<|begin_of_text|>"
2390
+ ]
2391
+ }
2392
+ }
2393
+ }
2394
+ ]
2395
  },
2396
  "decoder": {
2397
  "type": "ByteLevel",