|
{ |
|
"version": "1.0", |
|
"truncation": { |
|
"direction": "Right", |
|
"max_length": 20, |
|
"strategy": "LongestFirst", |
|
"stride": 0 |
|
}, |
|
"padding": null, |
|
"added_tokens": [ |
|
{ |
|
"id": 0, |
|
"content": "<[|endoftext|>]", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 1, |
|
"content": "...", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 2, |
|
"content": "\n", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
}, |
|
{ |
|
"id": 230, |
|
"content": "<|endoftext|>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
} |
|
], |
|
"normalizer": null, |
|
"pre_tokenizer": { |
|
"type": "ByteLevel", |
|
"add_prefix_space": false, |
|
"trim_offsets": true, |
|
"use_regex": true |
|
}, |
|
"post_processor": { |
|
"type": "ByteLevel", |
|
"add_prefix_space": true, |
|
"trim_offsets": false, |
|
"use_regex": true |
|
}, |
|
"decoder": { |
|
"type": "ByteLevel", |
|
"add_prefix_space": true, |
|
"trim_offsets": true, |
|
"use_regex": true |
|
}, |
|
"model": { |
|
"type": "BPE", |
|
"dropout": null, |
|
"unk_token": null, |
|
"continuing_subword_prefix": null, |
|
"end_of_word_suffix": null, |
|
"fuse_unk": false, |
|
"byte_fallback": false, |
|
"vocab": { |
|
"<[|endoftext|>]": 0, |
|
"...": 1, |
|
"\n": 2, |
|
"A": 3, |
|
"B": 4, |
|
"C": 5, |
|
"D": 6, |
|
"Ċ": 7, |
|
"BB": 8, |
|
"AA": 9, |
|
"DD": 10, |
|
"CC": 11, |
|
"BBBB": 12, |
|
"AAAA": 13, |
|
"DDDD": 14, |
|
"CCCC": 15, |
|
"BBBBBBBB": 16, |
|
"AAAAAAAA": 17, |
|
"DDDDDDDD": 18, |
|
"CCCCCCCC": 19, |
|
"BBB": 20, |
|
"AAA": 21, |
|
"CCC": 22, |
|
"DDD": 23, |
|
"BBBBB": 24, |
|
"AAAAA": 25, |
|
"DDDDD": 26, |
|
"CCCCC": 27, |
|
"BBBBBB": 28, |
|
"AAAAAA": 29, |
|
"DDDDDD": 30, |
|
"CCCCCC": 31, |
|
"BBBBBBB": 32, |
|
"BBBBBBBBBBBBBBBB": 33, |
|
"AAAAAAA": 34, |
|
"AAAAAAAAAAAAAAAA": 35, |
|
"DDDDDDD": 36, |
|
"CCCCCCC": 37, |
|
"DDDDDDDDDDDDDDDD": 38, |
|
"CCCCCCCCCCCCCCCC": 39, |
|
"BBBBBBBBB": 40, |
|
"AAAAAAAAA": 41, |
|
"DDDDDDDDD": 42, |
|
"BBBBBBBBBB": 43, |
|
"AAAAAAAAAA": 44, |
|
"CCCCCCCCC": 45, |
|
"BBBBBBBBBBB": 46, |
|
"AAAAAAAAAAA": 47, |
|
"DDDDDDDDDD": 48, |
|
"CCCCCCCCCC": 49, |
|
"BBBBBBBBBBBB": 50, |
|
"DDDDDDDDDDD": 51, |
|
"AAAAAAAAAAAA": 52, |
|
"CCCCCCCCCCC": 53, |
|
"BBBAAA": 54, |
|
"BBBBBBBBBBBBB": 55, |
|
"AAAAAAAAAAAAA": 56, |
|
"BBBCCC": 57, |
|
"DDDDDDDDDDDD": 58, |
|
"BBBDDD": 59, |
|
"CCCCCCCCCCCC": 60, |
|
"AAACCC": 61, |
|
"AAADDD": 62, |
|
"AAAAAAAAAAAAAA": 63, |
|
"AAAABBBB": 64, |
|
"BBBBBBBBBBBBBB": 65, |
|
"DDDDDDDDDDDDD": 66, |
|
"CCCCCCCCCCCCC": 67, |
|
"BBBBBBBBBBBBBBB": 68, |
|
"AAABBB": 69, |
|
"DDDCCC": 70, |
|
"BBBBAAAA": 71, |
|
"AAAAAAAAAAAAAAA": 72, |
|
"BBBBCCCC": 73, |
|
"DDDDDDDDDDDDDD": 74, |
|
"BBBBDDDD": 75, |
|
"BBBBCCC": 76, |
|
"AAAACCCC": 77, |
|
"AAAADDDD": 78, |
|
"AAAACCC": 79, |
|
"BBBCCCC": 80, |
|
"CCCCCCCCCCCCCC": 81, |
|
"BBBDDDD": 82, |
|
"AAADDDD": 83, |
|
"AAAADDD": 84, |
|
"BBBBDDD": 85, |
|
"AAACCCC": 86, |
|
"DDDDDDDDDDDDDDD": 87, |
|
"CCCCCCCCCCCCCCC": 88, |
|
"BBBBBBBBBBBBBBBBBBBBBBBBBBBBBBBB": 89, |
|
"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA": 90, |
|
"BBBBAAAAA": 91, |
|
"BBBAA": 92, |
|
"AAAABBBBB": 93, |
|
"CCCDDD": 94, |
|
"BBBBAAA": 95, |
|
"BBBCC": 96, |
|
"BBBBBBBBBBBBBBBBB": 97, |
|
"BBBBBAAAAA": 98, |
|
"AAAAAAAAAAAAAAAAA": 99, |
|
"BBBDD": 100, |
|
"AAABB": 101, |
|
"BBBBBCCCC": 102, |
|
"DDDDCCCC": 103, |
|
"BBBBDDDDD": 104, |
|
"BBBAAAAA": 105, |
|
"BBBBCCCCC": 106, |
|
"BBBAAAA": 107, |
|
"AAACC": 108, |
|
"DDDDCCC": 109, |
|
"AAADD": 110, |
|
"BBBBBBBBBBBBBBBBBB": 111, |
|
"AAAAACCCC": 112, |
|
"AAAACCCCC": 113, |
|
"AAAAAAAAAAAAAAAAAA": 114, |
|
"BBCCC": 115, |
|
"AAAADDDDD": 116, |
|
"BBBBBDDDD": 117, |
|
"BBBCCCCC": 118, |
|
"BBBBBCCC": 119, |
|
"DDDDDDDDDDDDDDDDDDDDDDDDDDDDDDDD": 120, |
|
"BBBBBDDDDD": 121, |
|
"DDDCCCC": 122, |
|
"AAAABBBBBB": 123, |
|
"BBBBAAAAAA": 124, |
|
"CCCCCCCCCCCCCCCCC": 125, |
|
"BBBDDDDD": 126, |
|
"BBBBBCCCCC": 127, |
|
"BBDDD": 128, |
|
"DDDDDDDDDDDDDDDDD": 129, |
|
"AADDD": 130, |
|
"BBBBBDDD": 131, |
|
"AAAAADDDD": 132, |
|
"AAABBBBBB": 133, |
|
"AACCC": 134, |
|
"BBBAAAAAA": 135, |
|
"AAAAACCCCC": 136, |
|
"DDDDDCCCC": 137, |
|
"BBBBBAAAAAA": 138, |
|
"AAAAACCC": 139, |
|
"AAACCCCC": 140, |
|
"CCCCCCCCCCCCCCCCCC": 141, |
|
"DDDDDDDDDDDDDDDDDD": 142, |
|
"AAADDDDD": 143, |
|
"AAAAADDDDD": 144, |
|
"AAAADDDDDD": 145, |
|
"BBBBDDDDDD": 146, |
|
"BBBBBBCCCC": 147, |
|
"DDCCC": 148, |
|
"CCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCC": 149, |
|
"BBBBCCCCCC": 150, |
|
"AAAAAACCCC": 151, |
|
"BBBBBBDDDD": 152, |
|
"AAAABBBBBBB": 153, |
|
"BBBCCCCCC": 154, |
|
"BBBBBBDDDDD": 155, |
|
"BBBBBCCCCCC": 156, |
|
"BBBDDDDDD": 157, |
|
"AAAACCCCCC": 158, |
|
"BBCC": 159, |
|
"CCCCDDD": 160, |
|
"AAAAADDD": 161, |
|
"BBBBBDDDDDD": 162, |
|
"BBBBBBCCC": 163, |
|
"DDDDCCCCC": 164, |
|
"BBBBBBCCCCC": 165, |
|
"BBBBAAAAAAA": 166, |
|
"AAACCCCCC": 167, |
|
"AAABBBBBBB": 168, |
|
"CCDDD": 169, |
|
"BBBBBAAAA": 170, |
|
"BBBBBBDDD": 171, |
|
"AAAAAADDDDD": 172, |
|
"CCCCDDDD": 173, |
|
"AAAAAACCCCC": 174, |
|
"AAAAADDDDDD": 175, |
|
"DDDDDDDDDDDDDDDDDDD": 176, |
|
"BBAA": 177, |
|
"AAABBBB": 178, |
|
"AAABBBBB": 179, |
|
"BBBAAAAAAA": 180, |
|
"AAAAAADDDD": 181, |
|
"CCCCCCCCCCCCCCCCCCC": 182, |
|
"AAAAACCCCCC": 183, |
|
"AAAAABBBBBB": 184, |
|
"AAADDDDDD": 185, |
|
"BBBBBBBCCCC": 186, |
|
"BBDD": 187, |
|
"BBBBBBBCCC": 188, |
|
"CCCCCDDDD": 189, |
|
"AAAAAACCC": 190, |
|
"BBBBBAAAAAAA": 191, |
|
"BBBDDDDDDD": 192, |
|
"DDDDDCCC": 193, |
|
"DDDDDCCCCC": 194, |
|
"AAAAAADDDDDD": 195, |
|
"AACC": 196, |
|
"BBBBBBBDDDD": 197, |
|
"AAAAAADDD": 198, |
|
"BBBBCCCCCCC": 199, |
|
"AAAABBBBBBBB": 200, |
|
"BBBCCCCCCC": 201, |
|
"BBBBBDDDDDDD": 202, |
|
"BBBBDDDDDDD": 203, |
|
"BBBBBBCCCCCC": 204, |
|
"AAAACCCCCCC": 205, |
|
"AAABBBBBBBB": 206, |
|
"BBCCCC": 207, |
|
"AAAADDDDDDD": 208, |
|
"BBBAAAAAAAA": 209, |
|
"BBBBBBDDDDDD": 210, |
|
"BBBBAAAAAAAA": 211, |
|
"AAAAAACCCCCC": 212, |
|
"BBBBBBAAAAA": 213, |
|
"BBBBCC": 214, |
|
"AAAAAAACCCC": 215, |
|
"DDDDDDDDDDDDDDDDDDDD": 216, |
|
"AAACCCCCCC": 217, |
|
"BBBBBBBDDD": 218, |
|
"BBBBBBBCCCCC": 219, |
|
"AADD": 220, |
|
"AAAAAAACCCCC": 221, |
|
"BBBBBBBBCCCC": 222, |
|
"BBDDDD": 223, |
|
"AAAAAAACCC": 224, |
|
"BBBBDD": 225, |
|
"AAADDDDDDD": 226, |
|
"AADDDD": 227, |
|
"AAAAADDDDDDD": 228, |
|
"BBBBBBBBCCC": 229 |
|
}, |
|
"merges": [ |
|
"B B", |
|
"A A", |
|
"D D", |
|
"C C", |
|
"BB BB", |
|
"AA AA", |
|
"DD DD", |
|
"CC CC", |
|
"BBBB BBBB", |
|
"AAAA AAAA", |
|
"DDDD DDDD", |
|
"CCCC CCCC", |
|
"BB B", |
|
"AA A", |
|
"CC C", |
|
"DD D", |
|
"BBBB B", |
|
"AAAA A", |
|
"DDDD D", |
|
"CCCC C", |
|
"BBBB BB", |
|
"AAAA AA", |
|
"DDDD DD", |
|
"CCCC CC", |
|
"BBBB BBB", |
|
"BBBBBBBB BBBBBBBB", |
|
"AAAA AAA", |
|
"AAAAAAAA AAAAAAAA", |
|
"DDDD DDD", |
|
"CCCC CCC", |
|
"DDDDDDDD DDDDDDDD", |
|
"CCCCCCCC CCCCCCCC", |
|
"BBBBBBBB B", |
|
"AAAAAAAA A", |
|
"DDDDDDDD D", |
|
"BBBBBBBB BB", |
|
"AAAAAAAA AA", |
|
"CCCCCCCC C", |
|
"BBBBBBBB BBB", |
|
"AAAAAAAA AAA", |
|
"DDDDDDDD DD", |
|
"CCCCCCCC CC", |
|
"BBBBBBBB BBBB", |
|
"DDDDDDDD DDD", |
|
"AAAAAAAA AAAA", |
|
"CCCCCCCC CCC", |
|
"BBB AAA", |
|
"BBBBBBBB BBBBB", |
|
"AAAAAAAA AAAAA", |
|
"BBB CCC", |
|
"DDDDDDDD DDDD", |
|
"BBB DDD", |
|
"CCCCCCCC CCCC", |
|
"AAA CCC", |
|
"AAA DDD", |
|
"AAAAAAAA AAAAAA", |
|
"AAAA BBBB", |
|
"BBBBBBBB BBBBBB", |
|
"DDDDDDDD DDDDD", |
|
"CCCCCCCC CCCCC", |
|
"BBBBBBBB BBBBBBB", |
|
"AAA BBB", |
|
"DDD CCC", |
|
"BBBB AAAA", |
|
"AAAAAAAA AAAAAAA", |
|
"BBBB CCCC", |
|
"DDDDDDDD DDDDDD", |
|
"BBBB DDDD", |
|
"BBBB CCC", |
|
"AAAA CCCC", |
|
"AAAA DDDD", |
|
"AAAA CCC", |
|
"BBB CCCC", |
|
"CCCCCCCC CCCCCC", |
|
"BBB DDDD", |
|
"AAA DDDD", |
|
"AAAA DDD", |
|
"BBBB DDD", |
|
"AAA CCCC", |
|
"DDDDDDDD DDDDDDD", |
|
"CCCCCCCC CCCCCCC", |
|
"BBBBBBBBBBBBBBBB BBBBBBBBBBBBBBBB", |
|
"AAAAAAAAAAAAAAAA AAAAAAAAAAAAAAAA", |
|
"BBBB AAAAA", |
|
"BBB AA", |
|
"AAAA BBBBB", |
|
"CCC DDD", |
|
"BBBB AAA", |
|
"BBB CC", |
|
"BBBBBBBBBBBBBBBB B", |
|
"BBBBB AAAAA", |
|
"AAAAAAAAAAAAAAAA A", |
|
"BBB DD", |
|
"AAA BB", |
|
"BBBBB CCCC", |
|
"DDDD CCCC", |
|
"BBBB DDDDD", |
|
"BBB AAAAA", |
|
"BBBB CCCCC", |
|
"BBB AAAA", |
|
"AAA CC", |
|
"DDDD CCC", |
|
"AAA DD", |
|
"BBBBBBBBBBBBBBBB BB", |
|
"AAAAA CCCC", |
|
"AAAA CCCCC", |
|
"AAAAAAAAAAAAAAAA AA", |
|
"BB CCC", |
|
"AAAA DDDDD", |
|
"BBBBB DDDD", |
|
"BBB CCCCC", |
|
"BBBBB CCC", |
|
"DDDDDDDDDDDDDDDD DDDDDDDDDDDDDDDD", |
|
"BBBBB DDDDD", |
|
"DDD CCCC", |
|
"AAAA BBBBBB", |
|
"BBBB AAAAAA", |
|
"CCCCCCCCCCCCCCCC C", |
|
"BBB DDDDD", |
|
"BBBBB CCCCC", |
|
"BB DDD", |
|
"DDDDDDDDDDDDDDDD D", |
|
"AA DDD", |
|
"BBBBB DDD", |
|
"AAAAA DDDD", |
|
"AAA BBBBBB", |
|
"AA CCC", |
|
"BBB AAAAAA", |
|
"AAAAA CCCCC", |
|
"DDDDD CCCC", |
|
"BBBBB AAAAAA", |
|
"AAAAA CCC", |
|
"AAA CCCCC", |
|
"CCCCCCCCCCCCCCCC CC", |
|
"DDDDDDDDDDDDDDDD DD", |
|
"AAA DDDDD", |
|
"AAAAA DDDDD", |
|
"AAAA DDDDDD", |
|
"BBBB DDDDDD", |
|
"BBBBBB CCCC", |
|
"DD CCC", |
|
"CCCCCCCCCCCCCCCC CCCCCCCCCCCCCCCC", |
|
"BBBB CCCCCC", |
|
"AAAAAA CCCC", |
|
"BBBBBB DDDD", |
|
"AAAA BBBBBBB", |
|
"BBB CCCCCC", |
|
"BBBBBB DDDDD", |
|
"BBBBB CCCCCC", |
|
"BBB DDDDDD", |
|
"AAAA CCCCCC", |
|
"BB CC", |
|
"CCCC DDD", |
|
"AAAAA DDD", |
|
"BBBBB DDDDDD", |
|
"BBBBBB CCC", |
|
"DDDD CCCCC", |
|
"BBBBBB CCCCC", |
|
"BBBB AAAAAAA", |
|
"AAA CCCCCC", |
|
"AAA BBBBBBB", |
|
"CC DDD", |
|
"BBBBB AAAA", |
|
"BBBBBB DDD", |
|
"AAAAAA DDDDD", |
|
"CCCC DDDD", |
|
"AAAAAA CCCCC", |
|
"AAAAA DDDDDD", |
|
"DDDDDDDDDDDDDDDD DDD", |
|
"BB AA", |
|
"AAA BBBB", |
|
"AAA BBBBB", |
|
"BBB AAAAAAA", |
|
"AAAAAA DDDD", |
|
"CCCCCCCCCCCCCCCC CCC", |
|
"AAAAA CCCCCC", |
|
"AAAAA BBBBBB", |
|
"AAA DDDDDD", |
|
"BBBBBBB CCCC", |
|
"BB DD", |
|
"BBBBBBB CCC", |
|
"CCCCC DDDD", |
|
"AAAAAA CCC", |
|
"BBBBB AAAAAAA", |
|
"BBB DDDDDDD", |
|
"DDDDD CCC", |
|
"DDDDD CCCCC", |
|
"AAAAAA DDDDDD", |
|
"AA CC", |
|
"BBBBBBB DDDD", |
|
"AAAAAA DDD", |
|
"BBBB CCCCCCC", |
|
"AAAA BBBBBBBB", |
|
"BBB CCCCCCC", |
|
"BBBBB DDDDDDD", |
|
"BBBB DDDDDDD", |
|
"BBBBBB CCCCCC", |
|
"AAAA CCCCCCC", |
|
"AAA BBBBBBBB", |
|
"BB CCCC", |
|
"AAAA DDDDDDD", |
|
"BBB AAAAAAAA", |
|
"BBBBBB DDDDDD", |
|
"BBBB AAAAAAAA", |
|
"AAAAAA CCCCCC", |
|
"BBBBBB AAAAA", |
|
"BBBB CC", |
|
"AAAAAAA CCCC", |
|
"DDDDDDDDDDDDDDDD DDDD", |
|
"AAA CCCCCCC", |
|
"BBBBBBB DDD", |
|
"BBBBBBB CCCCC", |
|
"AA DD", |
|
"AAAAAAA CCCCC", |
|
"BBBBBBBB CCCC", |
|
"BB DDDD", |
|
"AAAAAAA CCC", |
|
"BBBB DD", |
|
"AAA DDDDDDD", |
|
"AA DDDD", |
|
"AAAAA DDDDDDD", |
|
"BBBBBBBB CCC" |
|
] |
|
} |
|
} |