Update post-processor
Browse files- tokenizer.json +52 -4
tokenizer.json
CHANGED
@@ -372,10 +372,58 @@
|
|
372 |
"use_regex": true
|
373 |
},
|
374 |
"post_processor": {
|
375 |
-
"type": "
|
376 |
-
"
|
377 |
-
|
378 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
379 |
},
|
380 |
"decoder": {
|
381 |
"type": "ByteLevel",
|
|
|
372 |
"use_regex": true
|
373 |
},
|
374 |
"post_processor": {
|
375 |
+
"type": "TemplateProcessing",
|
376 |
+
"single": [
|
377 |
+
{
|
378 |
+
"SpecialToken": {
|
379 |
+
"id": "<|endoftext|>",
|
380 |
+
"type_id": 0
|
381 |
+
}
|
382 |
+
},
|
383 |
+
{
|
384 |
+
"Sequence": {
|
385 |
+
"id": "A",
|
386 |
+
"type_id": 0
|
387 |
+
}
|
388 |
+
}
|
389 |
+
],
|
390 |
+
"pair": [
|
391 |
+
{
|
392 |
+
"SpecialToken": {
|
393 |
+
"id": "<|endoftext|>",
|
394 |
+
"type_id": 0
|
395 |
+
}
|
396 |
+
},
|
397 |
+
{
|
398 |
+
"Sequence": {
|
399 |
+
"id": "A",
|
400 |
+
"type_id": 0
|
401 |
+
}
|
402 |
+
},
|
403 |
+
{
|
404 |
+
"SpecialToken": {
|
405 |
+
"id": "<|endoftext|>",
|
406 |
+
"type_id": 1
|
407 |
+
}
|
408 |
+
},
|
409 |
+
{
|
410 |
+
"Sequence": {
|
411 |
+
"id": "B",
|
412 |
+
"type_id": 1
|
413 |
+
}
|
414 |
+
}
|
415 |
+
],
|
416 |
+
"special_tokens": {
|
417 |
+
"<s>": {
|
418 |
+
"id": "<|endoftext|>",
|
419 |
+
"ids": [
|
420 |
+
50256
|
421 |
+
],
|
422 |
+
"tokens": [
|
423 |
+
"<|endoftext|>"
|
424 |
+
]
|
425 |
+
}
|
426 |
+
}
|
427 |
},
|
428 |
"decoder": {
|
429 |
"type": "ByteLevel",
|