{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 1, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 2, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 3, "content": "", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 4, "content": "", "single_word": false, "lstrip": true, "rstrip": false, "normalized": false, "special": true } ], "normalizer": null, "pre_tokenizer": { "type": "ByteLevel", "add_prefix_space": false, "trim_offsets": true, "use_regex": true }, "post_processor": { "type": "RobertaProcessing", "sep": [ "", 2 ], "cls": [ "", 0 ], "trim_offsets": true, "add_prefix_space": false }, "decoder": { "type": "ByteLevel", "add_prefix_space": true, "trim_offsets": true, "use_regex": true }, "model": { "type": "BPE", "dropout": null, "unk_token": null, "continuing_subword_prefix": "", "end_of_word_suffix": "", "fuse_unk": false, "byte_fallback": false, "vocab": { "": 0, "": 1, "": 2, "": 3, "": 4, "!": 5, "\"": 6, "#": 7, "$": 8, "%": 9, "&": 10, "'": 11, "(": 12, ")": 13, "*": 14, "+": 15, ",": 16, "-": 17, ".": 18, "/": 19, "0": 20, "1": 21, "2": 22, "3": 23, "4": 24, "5": 25, "6": 26, "7": 27, "8": 28, "9": 29, ":": 30, ";": 31, "<": 32, "=": 33, ">": 34, "?": 35, "@": 36, "A": 37, "B": 38, "C": 39, "D": 40, "E": 41, "F": 42, "G": 43, "H": 44, "I": 45, "J": 46, "K": 47, "L": 48, "M": 49, "N": 50, "O": 51, "P": 52, "Q": 53, "R": 54, "S": 55, "T": 56, "U": 57, "V": 58, "W": 59, "X": 60, "Y": 61, "Z": 62, "[": 63, "\\": 64, "]": 65, "^": 66, "_": 67, "`": 68, "a": 69, "b": 70, "c": 71, "d": 72, "e": 73, "f": 74, "g": 75, "h": 76, "i": 77, "j": 78, "k": 79, "l": 80, "m": 81, "n": 82, "o": 83, "p": 84, "q": 85, "r": 86, "s": 87, "t": 88, "u": 89, "v": 90, "w": 91, "x": 92, "y": 93, "z": 94, "{": 95, "|": 96, "}": 97, "~": 98, "¡": 99, "¢": 100, "£": 101, "¤": 102, "¥": 103, "¦": 104, "§": 105, "¨": 106, "©": 107, "ª": 108, "«": 109, "¬": 110, "®": 111, "¯": 112, "°": 113, "±": 114, "²": 115, "³": 116, "´": 117, "µ": 118, "¶": 119, "·": 120, "¸": 121, "¹": 122, "º": 123, "»": 124, "¼": 125, "½": 126, "¾": 127, "¿": 128, "À": 129, "Á": 130, "Â": 131, "Ã": 132, "Ä": 133, "Å": 134, "Æ": 135, "Ç": 136, "È": 137, "É": 138, "Ê": 139, "Ë": 140, "Ì": 141, "Í": 142, "Î": 143, "Ï": 144, "Ð": 145, "Ñ": 146, "Ò": 147, "Ó": 148, "Ô": 149, "Õ": 150, "Ö": 151, "×": 152, "Ø": 153, "Ù": 154, "Ú": 155, "Û": 156, "Ü": 157, "Ý": 158, "Þ": 159, "ß": 160, "à": 161, "á": 162, "â": 163, "ã": 164, "ä": 165, "å": 166, "æ": 167, "ç": 168, "è": 169, "é": 170, "ê": 171, "ë": 172, "ì": 173, "í": 174, "î": 175, "ï": 176, "ð": 177, "ñ": 178, "ò": 179, "ó": 180, "ô": 181, "õ": 182, "ö": 183, "÷": 184, "ø": 185, "ù": 186, "ú": 187, "û": 188, "ü": 189, "ý": 190, "þ": 191, "ÿ": 192, "Ā": 193, "ā": 194, "Ă": 195, "ă": 196, "Ą": 197, "ą": 198, "Ć": 199, "ć": 200, "Ĉ": 201, "ĉ": 202, "Ċ": 203, "ċ": 204, "Č": 205, "č": 206, "Ď": 207, "ď": 208, "Đ": 209, "đ": 210, "Ē": 211, "ē": 212, "Ĕ": 213, "ĕ": 214, "Ė": 215, "ė": 216, "Ę": 217, "ę": 218, "Ě": 219, "ě": 220, "Ĝ": 221, "ĝ": 222, "Ğ": 223, "ğ": 224, "Ġ": 225, "ġ": 226, "Ģ": 227, "ģ": 228, "Ĥ": 229, "ĥ": 230, "Ħ": 231, "ħ": 232, "Ĩ": 233, "ĩ": 234, "Ī": 235, "ī": 236, "Ĭ": 237, "ĭ": 238, "Į": 239, "į": 240, "İ": 241, "ı": 242, "IJ": 243, "ij": 244, "Ĵ": 245, "ĵ": 246, "Ķ": 247, "ķ": 248, "ĸ": 249, "Ĺ": 250, "ĺ": 251, "Ļ": 252, "ļ": 253, "Ľ": 254, "ľ": 255, "Ŀ": 256, "ŀ": 257, "Ł": 258, "ł": 259, "Ń": 260, "cc": 261, "CC": 262, "(=": 263, "ccc": 264, "](": 265, "@@": 266, "Cc": 267, "NC": 268, "ccccc": 269, "nc": 270, "CCC": 271, ")[": 272, "NH": 273, "+]": 274, "CO": 275, "cccc": 276, "Nc": 277, "Cl": 278, "OC": 279, "CCN": 280, ")(": 281, "COc": 282, "(-": 283, "([": 284, "CCCC": 285, "CN": 286, ")(=": 287, "-]": 288, "CCO": 289, "nH": 290, "nn": 291, "-])": 292, "+](": 293, "CCc": 294, ")=": 295, "sc": 296, "CS": 297, "ncc": 298, "Br": 299, "CNC": 300, "nnc": 301, "NCc": 302, "oc": 303, "12": 304, "CCCCC": 305, "+](=": 306, "COC": 307, "Cn": 308, "21": 309, "CCCN": 310, "cn": 311, "Oc": 312, "CCOC": 313, "+][": 314, "CCOCC": 315, "cnc": 316, "CCS": 317, "]([": 318, "CCOc": 319, "cccs": 320, "cccnc": 321, "NCC": 322, "OCC": 323, "(/": 324, "CCCO": 325, "@]": 326, "CSc": 327, "ccco": 328, "@@]": 329, "cnn": 330, "CCn": 331, "CCNC": 332, "32": 333, "ccccn": 334, "23": 335, "no": 336, "+])": 337, "noc": 338, ")/": 339, "csc": 340, "cs": 341, "ccncc": 342, "cccn": 343, "CCCc": 344, "Sc": 345, "SCC": 346, "ccnc": 347, "SC": 348, "OCc": 349, "ccsc": 350, "ccn": 351, "NNC": 352, "OCO": 353, "@](": 354, "ncnc": 355, "NS": 356, "NCCc": 357, "@@](": 358, "CNc": 359, "OCCO": 360, "=[": 361, "ncccc": 362, "cncc": 363, "NN": 364, "CCCCCC": 365, "NCCC": 366, "on": 367, "+]([": 368, "CCCCN": 369, "CCCNC": 370, "ncn": 371, "-])[": 372, "nccs": 373, "+]=": 374, "CSC": 375, "CCCn": 376, "sccc": 377, "SCc": 378, "CCSc": 379, "cncn": 380, "(\\": 381, "COCC": 382, "34": 383, "nnnn": 384, "nccc": 385, "COCc": 386, "ccoc": 387, ")([": 388, "nccn": 389, "CCCOc": 390, "ncccn": 391, "ccnn": 392, "CNS": 393, "@@](=": 394, "CCSC": 395, "43": 396, "COCCN": 397, "Fc": 398, "CCSCC": 399, "CCCS": 400, "cnccn": 401, "-])=": 402, "/[": 403, "@](=": 404, "coc": 405, "cnnc": 406, "CSCC": 407, "nnn": 408, "NNc": 409, "nnnc": 410, "NCCN": 411, "nncn": 412, "+])[": 413, "CCCCO": 414, "ncnn": 415, "CCl": 416, "CCNc": 417, "Clc": 418, "CSCc": 419, "OCCCO": 420, "cnnn": 421, "NCCNC": 422, "occc": 423, "onc": 424, "CCNS": 425, "OCCC": 426, ")=[": 427, "CCCOC": 428, "CCCCNC": 429, "COCCn": 430, "nccnc": 431, "FC": 432, "CCCCCCC": 433, "cscc": 434, "13": 435, "NO": 436, "cnccc": 437, "(\\[": 438, "OCCN": 439, "ns": 440, "nsc": 441, "COCCNC": 442, "NCCOc": 443, "CCCCn": 444, "cscn": 445, "NCCn": 446, "NCCCn": 447, "+])(": 448, "NCCCc": 449, "CCCCc": 450, "ccon": 451, "scnc": 452, "CCCNc": 453, "ncsc": 454, "Nn": 455, "NCCCN": 456, "nonc": 457, "Brc": 458, "NCCS": 459, "SCCC": 460, "COCCO": 461, "nncs": 462, "CNCc": 463, "scc": 464, "sccn": 465, "NCCCC": 466, "(=[": 467, "COCCOc": 468, "31": 469, "ocnc": 470, "nsnc": 471, "OCCc": 472, "CCOCc": 473, "OS": 474, "ncoc": 475, "cncnc": 476, "COCCC": 477, "NCCNc": 478, "CCCl": 479, "nnsc": 480, "CSCCS": 481, "COCCCNC": 482, "SCCc": 483, "co": 484, "CCCCS": 485, "OCCNC": 486, "NCCO": 487, "CCCSc": 488, "+]\\": 489, "-])/": 490, "CSCN": 491, "CCCCOc": 492, "\\[": 493, "CCCCCN": 494, "COCCCN": 495, "CSCCO": 496, ")-": 497, "OCCOc": 498, "cccnn": 499, "CON": 500, "CCOCCN": 501, "NCCCO": 502, "SCCN": 503, "+]/": 504, "CCCSCC": 505, "OCCn": 506, "csnn": 507, "@]([": 508, "CBr": 509, "45": 510, "ccno": 511, "OCCCC": 512, "](/": 513, ")(/": 514, "CSCCC": 515, "snc": 516, "@@]([": 517, "COCCc": 518, "ON": 519, "CCCSC": 520, "24": 521, "+])([": 522, "OCOC": 523, "ccncn": 524, "NCCSc": 525, "CCCOCC": 526, "CCNCC": 527, "nncc": 528, "CCCCCNC": 529, "(/[": 530, "54": 531, "ncon": 532, "COCCCC": 533, "NCCOC": 534, "CCCCOC": 535, "CSCCN": 536, "ncco": 537, "CCSCc": 538, "nnco": 539, "CONC": 540, "snnc": 541, "COCCOC": 542, "42": 543, "ncncc": 544, "CCNCc": 545, "NCCOCC": 546, "NNS": 547, "OCCCN": 548, "NOCc": 549, "OCCCc": 550, ")\\": 551, "NCCCCn": 552, "OCCCNC": 553, "SCCOc": 554, "CCCF": 555, "CCCNS": 556, "OCCNc": 557, "CCCCl": 558, "CCOCCO": 559, "NCCCOC": 560, "OCCS": 561, "SCCS": 562, "occ": 563, "CNCCc": 564, "CCCCCn": 565, "COCCCn": 566, "CNCC": 567, "CCOCCCNC": 568, "NCCCNC": 569, "ClC": 570, "-])(": 571, "-])=[": 572, "OCCOC": 573, "cnoc": 574, "cncs": 575, "NCN": 576, "CCCCSc": 577, "CNn": 578, "CCOCCC": 579, "NCCCSc": 580, "SCCO": 581, "CCCCNc": 582, "CCONC": 583, "NOC": 584, "COCO": 585, "CNN": 586, "CNCCN": 587, "-][": 588, "CCOCCNC": 589, "NCCCS": 590, "NCCCOc": 591, "NCCCCC": 592, "OCCSc": 593, "]/": 594, "OCn": 595, "CSCCNC": 596, "NOCC": 597, "OCCCn": 598, "ccs": 599, "+]=[": 600, "OCN": 601, "(-[": 602, "CCOCCCC": 603, "CCCCCO": 604, "CCOCCS": 605, "CCOCCn": 606, "CCSCCC": 607, "COCCNc": 608, "NCCNS": 609, "SCCCS": 610, "sn": 611, "ccnnc": 612, "CCBr": 613, "COCCOCC": 614, "COCCSc": 615, "ClCc": 616, "CCCCCc": 617, "-]/": 618, "CSCCCNC": 619, "cnco": 620, "cnns": 621, ")/[": 622, "COCCNS": 623, "OCCCOc": 624, "][": 625, "COCCCOc": 626, "CSCCCN": 627, "NCCCCN": 628, "COCCS": 629, "COCCCNc": 630, "CCOCCOc": 631, "CCCCCS": 632, "NCCSC": 633, "OP": 634, "SCCCC": 635, "COn": 636, "COCCCOC": 637, "CCCCOCC": 638, "CCCCCCNC": 639, "CSCCc": 640, "CSCCn": 641, "BrCc": 642, "NCCCNS": 643, "35": 644, "SCCn": 645, "oncc": 646, "COP": 647, "ClCC": 648, "-]=[": 649, "CCOP": 650, "CSCCCCNC": 651, "cnsn": 652, "OCCOCCOCCO": 653, "NCCCNc": 654, "OCCCl": 655, "OCCCSc": 656, "CI": 657, "NCCCCCC": 658, "NCCSCC": 659, "NCCOCc": 660, "OCCCCC": 661, "OCCNS": 662, "OCCSCc": 663, "SCN": 664, "SCn": 665, "SCCNC": 666, "SCCCOc": 667, "conc": 668, "COCOc": 669, "CCCCCCCCCCC": 670, "CCOCCOC": 671, "CCOCCCc": 672, "OCCCCN": 673, "CCCOCc": 674, "NCCCOCC": 675, "nscc": 676, "NCCCCl": 677, "NCCCCOc": 678, "41": 679, "CH": 680, "NSC": 681, "OH": 682, "On": 683, "OCCOCC": 684, "OCCSCC": 685, "OCCCS": 686, "SN": 687, "SCCCc": 688, "](/[": 689, "CCCSCc": 690, "COS": 691, "COCCCCC": 692, "CCCCCCCC": 693, "CCCCCCO": 694, "CCCCCCS": 695, "CNCCC": 696, "-])([": 697, "CCOCCCN": 698, "CCOCCCNc": 699, "CCOCCOCc": 700, "+](-": 701, "CSCCOc": 702, "CSCCSC": 703, "CCCNCC": 704, "cnsc": 705, "CCOCCNc": 706, "CCSS": 707, "CCSCCOC": 708, "OCCF": 709, "OCCBr": 710, "NCCSCc": 711, "(#": 712, "-[": 713, "312": 714, "FCCC": 715, "IC": 716, "Ic": 717, "NOc": 718, "ONC": 719, "OCCSC": 720, "OCCCCn": 721, "OCCOCCS": 722, "SH": 723, "SCCOC": 724, "SCCCO": 725, "SCCSc": 726, "ssc": 727, "ssnc": 728, "CCCBr": 729, "CCCNCc": 730, "COCCCCN": 731, "COCCCS": 732, "COCCSCc": 733, "COCCOCCNC": 734, "ClCCCSc": 735, "CCCCCCN": 736, "CCCCSC": 737, "CCCCOCc": 738, "CCOCCOCC": 739, "CCOCCCn": 740, "nnccc": 741, "-])\\": 742, "+](/": 743, "+](/[": 744, "CSCCCC": 745, "CSCCOC": 746, "CSCCCNc": 747, "BrCC": 748, "BrCCC": 749, "123": 750, "CCCCCOC": 751, "CCCCCOc": 752, "CCCCCSc": 753, "CCOCCSc": 754, "CCSCCOc": 755, "NNN": 756, "NCCCSC": 757, "CCCCNS": 758, "COCCNCc": 759, "OCCCNc": 760, "OCCCSC": 761, "NOCCc": 762, "COCCOCCN": 763, "NCCOCCO": 764, "CCOCCOCCOCC": 765, "OCCOCCOCCOCCO": 766 }, "merges": [ "c c", "C C", "( =", "cc c", "] (", "@ @", "C c", "N C", "cc ccc", "n c", "CC C", ") [", "N H", "+ ]", "C O", "cc cc", "N c", "C l", "O C", "CC N", ") (", "CO c", "( -", "( [", "CC CC", "C N", ") (=", "- ]", "CC O", "n H", "n n", "-] )", "+ ](", "CC c", ") =", "s c", "C S", "n cc", "B r", "C NC", "n nc", "N Cc", "o c", "1 2", "CC CCC", "+] (=", "CO C", "C n", "2 1", "CCC N", "c n", "O c", "CC OC", "+] [", "CCO CC", "c nc", "CC S", "]( [", "CCO c", "ccc s", "ccc nc", "N CC", "O CC", "( /", "CCC O", "@ ]", "CS c", "ccc o", "@@ ]", "c nn", "CC n", "CC NC", "3 2", "cccc n", "2 3", "n o", "+] )", "n oc", ") /", "c sc", "c s", "cc ncc", "ccc n", "CC Cc", "S c", "S CC", "cc nc", "S C", "O Cc", "cc sc", "cc n", "N NC", "O CO", "@ ](", "nc nc", "N S", "N CCc", "@@ ](", "C Nc", "O CCO", "= [", "n cccc", "c ncc", "N N", "CCCC CC", "N CCC", "o n", "+]( [", "CC CCN", "CCC NC", "nc n", "-] )[", "ncc s", "+] =", "CS C", "CCC n", "s ccc", "S Cc", "CCS c", "cnc n", "( \\", "CO CC", "3 4", "nn nn", "n ccc", "CO Cc", "cc oc", ")( [", "ncc n", "CCC Oc", "n cccn", "cc nn", "CN S", "@@] (=", "CCS C", "4 3", "CO CCN", "F c", "CCS CC", "CCC S", "cncc n", "-]) =", "/ [", "@] (=", "c oc", "c nnc", "CS CC", "nn n", "N Nc", "nn nc", "N CCN", "nnc n", "+] )[", "CCCC O", "nc nn", "CC l", "CC Nc", "Cl c", "CS Cc", "O CCCO", "cnn n", "NCC NC", "o ccc", "o nc", "CCN S", "O CCC", ")= [", "CCC OC", "CCCC NC", "CO CCn", "ncc nc", "F C", "CCCC CCC", "cs cc", "1 3", "N O", "cn ccc", "(\\ [", "O CCN", "n s", "n sc", "CO CCNC", "N CCOc", "CCCC n", "csc n", "NCC n", "NCCC n", "+] )(", "NCC Cc", "CCCC c", "cc on", "sc nc", "CCC Nc", "nc sc", "N n", "N CCCN", "no nc", "Br c", "N CCS", "S CCC", "CO CCO", "nnc s", "CN Cc", "s cc", "s ccn", "N CCCC", "(= [", "CO CCOc", "3 1", "oc nc", "ns nc", "O CCc", "CCO Cc", "O S", "nc oc", "cnc nc", "CO CCC", "NCC Nc", "CCC l", "nn sc", "CS CCS", "CO CCCNC", "S CCc", "c o", "CCCC S", "OCC NC", "N CCO", "CCC Sc", "+] \\", "-]) /", "CS CN", "CCCC Oc", "\\ [", "CCCCC N", "CO CCCN", "CS CCO", ") -", "O CCOc", "ccc nn", "CO N", "CCO CCN", "N CCCO", "S CCN", "+] /", "CCC SCC", "OCC n", "cs nn", "@ ]([", "C Br", "4 5", "cc no", "O CCCC", "]( /", ")( /", "CS CCC", "s nc", "@@ ]([", "CO CCc", "O N", "CCC SC", "2 4", "+] )([", "O COC", "ccnc n", "N CCSc", "CCC OCC", "CCN CC", "nn cc", "CCCCC NC", "(/ [", "5 4", "nc on", "CO CCCC", "N CCOC", "CCCC OC", "CS CCN", "ncc o", "CCS Cc", "nnc o", "CO NC", "s nnc", "CO CCOC", "4 2", "nc ncc", "CCN Cc", "N CCOCC", "N NS", "O CCCN", "N OCc", "OCC Cc", ") \\", "N CCCCn", "O CCCNC", "S CCOc", "CCC F", "CCCN S", "OCC Nc", "CCCC l", "CCO CCO", "NCCC OC", "O CCS", "S CCS", "o cc", "CN CCc", "CCCCC n", "CO CCCn", "CN CC", "CCO CCCNC", "NCCC NC", "Cl C", "-] )(", "-]) =[", "O CCOC", "cn oc", "cnc s", "NC N", "CCCC Sc", "CN n", "CCO CCC", "NCCC Sc", "S CCO", "CCCC Nc", "CCO NC", "N OC", "CO CO", "CN N", "CN CCN", "-] [", "CCOCC NC", "NCCC S", "NCCC Oc", "N CCCCC", "O CCSc", "] /", "OC n", "CS CCNC", "N OCC", "O CCCn", "cc s", "+] =[", "OC N", "(- [", "CCO CCCC", "CCCCC O", "CCOCC S", "CCOCC n", "CCS CCC", "COCC Nc", "NCCN S", "S CCCS", "s n", "cc nnc", "CC Br", "CO CCOCC", "CO CCSc", "Cl Cc", "CCCC Cc", "-] /", "CS CCCNC", "cnc o", "cnn s", ")/ [", "COCCN S", "O CCCOc", "] [", "CO CCCOc", "CS CCCN", "NCC CCN", "CO CCS", "CO CCCNc", "CCO CCOc", "CCCCC S", "N CCSC", "O P", "S CCCC", "CO n", "CO CCCOC", "CCCC OCC", "CCCC CCNC", "CS CCc", "CS CCn", "Br Cc", "NCCCN S", "3 5", "S CCn", "o ncc", "CO P", "Cl CC", "-] =[", "CCO P", "CS CCCCNC", "cn sn", "OCCO CCOCCO", "NCCC Nc", "OCCC l", "OCCC Sc", "C I", "N CCCCCC", "N CCSCC", "N CCOCc", "O CCCCC", "O CCNS", "O CCSCc", "S CN", "S Cn", "S CCNC", "S CCCOc", "c onc", "CO COc", "CCCC CCCCCCC", "CCO CCOC", "CCOCC Cc", "OCC CCN", "CCCO Cc", "NCCC OCC", "ns cc", "NCCCC l", "NCCCC Oc", "4 1", "C H", "N SC", "O H", "O n", "O CCOCC", "O CCSCC", "O CCCS", "S N", "S CCCc", "]( /[", "CCC SCc", "CO S", "CO CCCCC", "CCCC CCCC", "CCCC CCO", "CCCC CCS", "CN CCC", "-] )([", "CCO CCCN", "CCO CCCNc", "CCO CCOCc", "+]( -", "CS CCOc", "CS CCSC", "CCCN CC", "cn sc", "CCOCC Nc", "CCS S", "CCS CCOC", "OCC F", "OCC Br", "NCCS Cc", "( #", "- [", "3 12", "F CCC", "I C", "I c", "N Oc", "O NC", "O CCSC", "O CCCCn", "O CCOCCS", "S H", "S CCOC", "S CCCO", "S CCSc", "s sc", "s snc", "CCC Br", "CCC NCc", "CO CCCCN", "CO CCCS", "CO CCSCc", "CO CCOCCNC", "Cl CCCSc", "CCCC CCN", "CCCC SC", "CCCC OCc", "CCO CCOCC", "CCO CCCn", "nn ccc", "-]) \\", "+]( /", "+]( /[", "CS CCCC", "CS CCOC", "CS CCCNc", "Br CC", "Br CCC", "12 3", "CCCCC OC", "CCCCC Oc", "CCCCC Sc", "CCOCC Sc", "CCS CCOc", "NN N", "NCCC SC", "CCCCN S", "COCCN Cc", "OCCC Nc", "OCCC SC", "NO CCc", "COCCO CCN", "NCCO CCO", "CCOCCO CCOCC", "OCCOCCOCCO CCO" ] } }