| { |
| "version": "1.0", |
| "truncation": null, |
| "padding": null, |
| "added_tokens": [ |
| { |
| "id": 0, |
| "content": "[PAD]", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 1, |
| "content": "[UNK]", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 2, |
| "content": "[CLS]", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 3, |
| "content": "[SEP]", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| }, |
| { |
| "id": 4, |
| "content": "[MASK]", |
| "single_word": false, |
| "lstrip": false, |
| "rstrip": false, |
| "normalized": false, |
| "special": true |
| } |
| ], |
| "normalizer": null, |
| "pre_tokenizer": { |
| "type": "Split", |
| "pattern": { |
| "Regex": "(\\[[^\\]]+]|C\\(=N\\)N|CCC\\(C\\)|\\(CCCN\\)|NC\\(=O\\)|C\\(C\\)=O|=C\\(N\\)N|N=C\\(N\\)|NC\\(=N\\)|C\\(=O\\)C|CS\\(=O\\)|OC\\(=O\\)|C\\(=O\\)c|c\\(=O\\)n|C\\(=O\\)O|C\\(N\\)=O|cc\\(Br\\)|CC\\(=O\\)|C\\(=O\\)N|ccc\\(C\\)|ccc\\(F\\)|c\\(=O\\)|C\\(=N\\)|c\\(O\\)c|NC\\(C\\)|n\\(C\\)c|CC\\(O\\)|cc\\(N\\)|CC\\(C\\)|cc\\(C\\)|C\\(=O\\)|cc\\(O\\)|c\\(N\\)c|c\\(Cl\\)|C\\(N\\)N|N\\(C\\)C|NC\\(N\\)|=C\\(N\\)|C\\(O\\)C|c\\(OC\\)|\\(C#N\\)|C\\(C\\)C|CC\\(N\\)|C\\(C\\)N|c\\(CO\\)|c\\(Br\\)|\\(CCO\\)|C\\(CC\\)|S\\(=O\\)|c\\(C\\)c|\\(=N\\)|c\\(O\\)|\\(Br\\)|\\(CS\\)|c\\(C\\)|\\(CC\\)|c\\(I\\)|C\\(C\\)|N\\(C\\)|C\\(O\\)|C\\(I\\)|C\\(F\\)|\\(Cl\\)|n\\(C\\)|\\(OC\\)|\\(=O\\)|c\\(F\\)|CCCN\\)|\\(=S\\)|c\\(N\\)|\\(CO\\)|C\\(N\\)|\\(C\\)|ccccc|\\(S\\)|\\(F\\)|\\(O\\)|C#N\\)|CCO\\)|\\(N\\)|C\\(=N|\\(I\\)|CSSC|=N\\)|CC=O|CCCO|Cl\\)|CCNO|=O\\)|CCSC|\\(=N|CO\\)|CCNC|CCCC|=S\\)|CN=C|CCCS|cccc|CCCN|Br\\)|cccn|CS\\)|C=CC|OC\\)|CC=C|cnn|=NC|COC|OCC|\\(O|CCS|CNc|#Cc|=CC|ccn|C=C|CSc|ccc|NCc|CCO|N=C|cnc|I\\)|CCc|OCc|CCl|ccs|COc|CCn|CSC|SCC|NCC|CCN|CNC|C#C|C=O|CNO|CCC|SSC|C#N|O=C|NOC|S\\)|csc|ncc|C\\)|N\\)|\\(C|ncn|F\\)|O\\)|N#C|nnc|CSS|cco|Cl|NC|nc|co|CS|CO|no|cc|CN|cn|SS|OC|\\)|SN|nn|CC|#C|NO|=S|NS|cs|=C|Oc|=O|oc|Nc|Cc|=N|NN|C=|C#|\\(|SC|sc|Br|N#|#N|p|O|I|N|C|s|=|c|B|S|F|n|P|#|o)" |
| }, |
| "behavior": "Isolated", |
| "invert": false |
| }, |
| "post_processor": { |
| "type": "TemplateProcessing", |
| "single": [ |
| { |
| "SpecialToken": { |
| "id": "[CLS]", |
| "type_id": 0 |
| } |
| }, |
| { |
| "Sequence": { |
| "id": "A", |
| "type_id": 0 |
| } |
| }, |
| { |
| "SpecialToken": { |
| "id": "[SEP]", |
| "type_id": 0 |
| } |
| } |
| ], |
| "pair": [ |
| { |
| "SpecialToken": { |
| "id": "[CLS]", |
| "type_id": 0 |
| } |
| }, |
| { |
| "Sequence": { |
| "id": "A", |
| "type_id": 0 |
| } |
| }, |
| { |
| "SpecialToken": { |
| "id": "[SEP]", |
| "type_id": 0 |
| } |
| }, |
| { |
| "Sequence": { |
| "id": "B", |
| "type_id": 1 |
| } |
| }, |
| { |
| "SpecialToken": { |
| "id": "[SEP]", |
| "type_id": 1 |
| } |
| } |
| ], |
| "special_tokens": { |
| "[CLS]": { |
| "id": "[CLS]", |
| "ids": [ |
| 2 |
| ], |
| "tokens": [ |
| "[CLS]" |
| ] |
| }, |
| "[SEP]": { |
| "id": "[SEP]", |
| "ids": [ |
| 3 |
| ], |
| "tokens": [ |
| "[SEP]" |
| ] |
| } |
| } |
| }, |
| "decoder": null, |
| "model": { |
| "type": "WordPiece", |
| "unk_token": "[UNK]", |
| "continuing_subword_prefix": "##", |
| "max_input_chars_per_word": 100, |
| "vocab": { |
| "[PAD]": 0, |
| "[UNK]": 1, |
| "[CLS]": 2, |
| "[SEP]": 3, |
| "[MASK]": 4, |
| ":": 5, |
| "%11": 6, |
| "-": 7, |
| "[As]": 8, |
| "[pH]": 9, |
| "[Po]": 10, |
| "[Ra]": 11, |
| "[3H]": 12, |
| "[S-]": 13, |
| "8": 14, |
| "%21": 15, |
| "[CH-]": 16, |
| "[IH]": 17, |
| "P": 18, |
| "[SeH]": 19, |
| "[O]": 20, |
| "4": 21, |
| "/": 22, |
| "[N-]": 23, |
| "[129Xe]": 24, |
| "[Cl+3]": 25, |
| "3": 26, |
| "[C@@]": 27, |
| "[11CH3]": 28, |
| "[13C]": 29, |
| "[Sn+]": 30, |
| "[P@@]": 31, |
| "[Ge]": 32, |
| "[BH3-]": 33, |
| "[123I]": 34, |
| "[14CH2]": 35, |
| "[Al-]": 36, |
| "[Si]": 37, |
| "[S@]": 38, |
| "[W]": 39, |
| "=": 40, |
| "%19": 41, |
| "Cl": 42, |
| "[Cl+2]": 43, |
| "%14": 44, |
| "[Al]": 45, |
| "9": 46, |
| "[B-]": 47, |
| "[Cl+]": 48, |
| "[TlH2]": 49, |
| "[NH2+]": 50, |
| "[11CH]": 51, |
| "[SnH]": 52, |
| "[SiH3]": 53, |
| "[Sn]": 54, |
| "[11C]": 55, |
| "S": 56, |
| "[SiH2]": 57, |
| "%18": 58, |
| "[BH-]": 59, |
| "[Ru]": 60, |
| "%10": 61, |
| "[V]": 62, |
| "[o+]": 63, |
| "[O+]": 64, |
| "c": 65, |
| "[I-]": 66, |
| "[C@@H]": 67, |
| "n": 68, |
| "2": 69, |
| "[Se-]": 70, |
| "[N+]": 71, |
| "N": 72, |
| "s": 73, |
| "[PH+]": 74, |
| "[C@]": 75, |
| "[N@]": 76, |
| "[C+]": 77, |
| "[s+]": 78, |
| "[N@+]": 79, |
| "[125I]": 80, |
| "[cH-]": 81, |
| "[Th]": 82, |
| "C": 83, |
| "[Sb]": 84, |
| "5": 85, |
| "[c-]": 86, |
| "#": 87, |
| "[Ca]": 88, |
| "%16": 89, |
| "[Tl]": 90, |
| "[18F]": 91, |
| "[223Ra]": 92, |
| "[BH2-]": 93, |
| "[O-]": 94, |
| "[Bi]": 95, |
| "[te]": 96, |
| "Br": 97, |
| "[Cr]": 98, |
| "[N@@]": 99, |
| "[Hg]": 100, |
| "[S@+]": 101, |
| "\\": 102, |
| "[n+]": 103, |
| "%15": 104, |
| "[123Te]": 105, |
| "[C-]": 106, |
| "1": 107, |
| "[NH+]": 108, |
| "[I+]": 109, |
| "[CH]": 110, |
| "%13": 111, |
| "[Pb]": 112, |
| "[14C]": 113, |
| "[2H]": 114, |
| "[P@]": 115, |
| "[OH+]": 116, |
| ")": 117, |
| "[Tc]": 118, |
| "[se+]": 119, |
| "[NH-]": 120, |
| "[nH]": 121, |
| "B": 122, |
| "[CH2]": 123, |
| "[P+]": 124, |
| "[se]": 125, |
| "[In]": 126, |
| "[Te]": 127, |
| "[Se+]": 128, |
| "%12": 129, |
| "[S+]": 130, |
| "o": 131, |
| "[C]": 132, |
| "[N@@+]": 133, |
| "[n-]": 134, |
| "6": 135, |
| "[S@@]": 136, |
| "[nH+]": 137, |
| "[Si+]": 138, |
| "[PH]": 139, |
| "[Hg+]": 140, |
| "[C@H]": 141, |
| "[Ga]": 142, |
| "[S@@+]": 143, |
| "[NH3+]": 144, |
| "[SiH]": 145, |
| "[11c]": 146, |
| "%20": 147, |
| "%17": 148, |
| "(": 149, |
| "O": 150, |
| "[IH2]": 151, |
| "[As+]": 152, |
| "F": 153, |
| "[CH2-]": 154, |
| "[Se]": 155, |
| "[c+]": 156, |
| "%23": 157, |
| "[SH]": 158, |
| "I": 159, |
| "7": 160, |
| "%22": 161, |
| "[Os]": 162, |
| "[OH]": 163, |
| "p": 164, |
| "[P@+]": 165, |
| "[Ag+]": 166, |
| "[Ag-4]": 167, |
| "[Ag]": 168, |
| "[Al-3]": 169, |
| "[AsH3]": 170, |
| "[AsH]": 171, |
| "[At]": 172, |
| "[B@-]": 173, |
| "[B@@-]": 174, |
| "[B]": 175, |
| "[Ba]": 176, |
| "[Br+2]": 177, |
| "[BrH]": 178, |
| "[Br]": 179, |
| "[CH3]": 180, |
| "[CaH2]": 181, |
| "[Cs]": 182, |
| "[FH]": 183, |
| "[F]": 184, |
| "[H]": 185, |
| "[He]": 186, |
| "[I+2]": 187, |
| "[I+3]": 188, |
| "[I]": 189, |
| "[K]": 190, |
| "[Kr]": 191, |
| "[Li+]": 192, |
| "[LiH]": 193, |
| "[MgH2]": 194, |
| "[Mg]": 195, |
| "[NH3]": 196, |
| "[N]": 197, |
| "[Na]": 198, |
| "[OH2]": 199, |
| "[P@@+]": 200, |
| "[PH2]": 201, |
| "[P]": 202, |
| "[Rb]": 203, |
| "[SH+]": 204, |
| "[SH2]": 205, |
| "[S]": 206, |
| "[Se-2]": 207, |
| "[SeH2]": 208, |
| "[Si@]": 209, |
| "[SrH2]": 210, |
| "[TeH]": 211, |
| "[Xe]": 212, |
| "[Zn+2]": 213, |
| "[Zn-2]": 214, |
| "[Zn]": 215, |
| "[n]": 216, |
| "[te+]": 217, |
| "=O": 218, |
| "CC": 219, |
| "NC": 220, |
| "CO": 221, |
| "cc": 222, |
| "CCC": 223, |
| "CCCC": 224, |
| "ccc": 225, |
| "CCN": 226, |
| "CCCN": 227, |
| "CN": 228, |
| "CNC": 229, |
| "cccc": 230, |
| "ccccc": 231, |
| "N)": 232, |
| "(N)": 233, |
| "=O)": 234, |
| "(=O)": 235, |
| "C(=O)": 236, |
| "C(=O)N": 237, |
| "O)": 238, |
| "(C": 239, |
| "(C)": 240, |
| "C(C)": 241, |
| "C(C)C": 242, |
| "CC(=O)": 243, |
| "C(=O)O": 244, |
| "C(=O)C": 245, |
| "C(N)": 246, |
| "CC(N)": 247, |
| "C(N)=O": 248, |
| "CO)": 249, |
| "(CO)": 250, |
| "CC(C)": 251, |
| "CS": 252, |
| "=N": 253, |
| "CCNC": 254, |
| "NC(=O)": 255, |
| "=N)": 256, |
| "(=N)": 257, |
| "C(=N)": 258, |
| "CC=O": 259, |
| "CCCN)": 260, |
| "(CCCN)": 261, |
| "NC(=N)": 262, |
| "Br)": 263, |
| "(Br)": 264, |
| "F)": 265, |
| "(F)": 266, |
| "S)": 267, |
| "(S)": 268, |
| "C)": 269, |
| "(O)": 270, |
| "CCS": 271, |
| "CCCS": 272, |
| "CCSC": 273, |
| "cn": 274, |
| "ccn": 275, |
| "cccn": 276, |
| "CSC": 277, |
| "=C": 278, |
| "CCO": 279, |
| "(O": 280, |
| "(=N": 281, |
| "C(=N": 282, |
| "c(O)": 283, |
| "OC": 284, |
| "SCC": 285, |
| "ccc(F)": 286, |
| "S(=O)": 287, |
| "O=C": 288, |
| "CCc": 289, |
| "OC(=O)": 290, |
| "C#": 291, |
| "Cc": 292, |
| "C=C": 293, |
| "C=": 294, |
| "#N": 295, |
| "C#N": 296, |
| "ccs": 297, |
| "NO": 298, |
| "C(O)": 299, |
| "csc": 300, |
| "ccc(C)": 301, |
| "cc(Br)": 302, |
| "ncn": 303, |
| "CCNO": 304, |
| "CCCO": 305, |
| "CSS": 306, |
| "CSSC": 307, |
| "=CC": 308, |
| "I)": 309, |
| "(I)": 310, |
| "CNO": 311, |
| "N(C)": 312, |
| "N(C)C": 313, |
| "C(N)N": 314, |
| "NOC": 315, |
| "C(C)=O": 316, |
| "#C": 317, |
| "cco": 318, |
| "NS": 319, |
| "SN": 320, |
| "c(=O)n": 321, |
| "=S)": 322, |
| "(=S)": 323, |
| "c(N)c": 324, |
| "N=C": 325, |
| "SC": 326, |
| "SSC": 327, |
| "CCC(C)": 328, |
| "c(=O)": 329, |
| "C#N)": 330, |
| "(C#N)": 331, |
| "SS": 332, |
| "=S": 333, |
| "oc": 334, |
| "co": 335, |
| "no": 336, |
| "N#": 337, |
| "N#C": 338, |
| "nc": 339, |
| "sc": 340, |
| "C(=N)N": 341, |
| "C=O": 342, |
| "c(F)": 343, |
| "C(F)": 344, |
| "c(I)": 345, |
| "C(I)": 346, |
| "cnn": 347, |
| "cc(N)": 348, |
| "NC(N)": 349, |
| "OC)": 350, |
| "(OC)": 351, |
| "c(OC)": 352, |
| "c(Br)": 353, |
| "c(N)": 354, |
| "cc(O)": 355, |
| "CS)": 356, |
| "(CS)": 357, |
| "Oc": 358, |
| "cnc": 359, |
| "Cl)": 360, |
| "(Cl)": 361, |
| "c(Cl)": 362, |
| "c(O)c": 363, |
| "NCC": 364, |
| "COC": 365, |
| "OCC": 366, |
| "Nc": 367, |
| "ncc": 368, |
| "cc(C)": 369, |
| "nn": 370, |
| "cs": 371, |
| "c(C)c": 372, |
| "COc": 373, |
| "C(=O)c": 374, |
| "c(C)": 375, |
| "(CC)": 376, |
| "NCc": 377, |
| "nnc": 378, |
| "C(O)C": 379, |
| "=C(N)": 380, |
| "C=CC": 381, |
| "=C(N)N": 382, |
| "N=C(N)": 383, |
| "OCc": 384, |
| "CC=C": 385, |
| "CCl": 386, |
| "CCn": 387, |
| "CNc": 388, |
| "CC(O)": 389, |
| "NN": 390, |
| "CSc": 391, |
| "NC(C)": 392, |
| "CS(=O)": 393, |
| "C(CC)": 394, |
| "C#C": 395, |
| "C(C)N": 396, |
| "CCO)": 397, |
| "(CCO)": 398, |
| "CN=C": 399, |
| "n(C)": 400, |
| "n(C)c": 401, |
| "c(CO)": 402, |
| "#Cc": 403, |
| "=NC": 404 |
| } |
| } |
| } |