{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 0, "special": true, "content": "[PAD]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false }, { "id": 1, "special": true, "content": "[UNK]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false }, { "id": 2, "special": true, "content": "[CLS]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false }, { "id": 3, "special": true, "content": "[SEP]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false }, { "id": 4, "special": true, "content": "[MASK]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false } ], "normalizer": { "type": "BertNormalizer", "clean_text": true, "handle_chinese_chars": true, "strip_accents": true, "lowercase": false }, "pre_tokenizer": { "type": "Sequence", "pretokenizers": [ { "type": "WhitespaceSplit" }, { "type": "Split", "pattern": { "Regex": "(\\[[^\\]]+]|Br?|Cl?|N|O|S|P|F|I|b|c|n|o|s|p|\\(|\\)|\\.|=|#|-|\\+|\\\\|\\/|:|~|@|\\?|>>?|\\*|\\$|\\%[0-9]{2}|[0-9])" }, "behavior": "Isolated", "invert": false } ] }, "post_processor": { "type": "TemplateProcessing", "single": [ { "SpecialToken": { "id": "[CLS]", "type_id": 0 } }, { "Sequence": { "id": "A", "type_id": 0 } }, { "SpecialToken": { "id": "[SEP]", "type_id": 0 } } ], "pair": [ { "SpecialToken": { "id": "[CLS]", "type_id": 0 } }, { "Sequence": { "id": "A", "type_id": 0 } }, { "SpecialToken": { "id": "[SEP]", "type_id": 0 } }, { "Sequence": { "id": "B", "type_id": 1 } }, { "SpecialToken": { "id": "[SEP]", "type_id": 1 } } ], "special_tokens": { "[CLS]": { "id": "[CLS]", "ids": [ 2 ], "tokens": [ "[CLS]" ] }, "[SEP]": { "id": "[SEP]", "ids": [ 3 ], "tokens": [ "[SEP]" ] } } }, "decoder": { "type": "WordPiece", "prefix": "##", "cleanup": true }, "model": { "type": "WordPiece", "unk_token": "[UNK]", "continuing_subword_prefix": "##", "max_input_chars_per_word": 100, "vocab": { "[PAD]": 0, "[UNK]": 1, "[CLS]": 2, "[SEP]": 3, "[MASK]": 4, "=": 5, "F": 6, "N": 7, "1": 8, "n": 9, "O": 10, "C": 11, "S": 12, "c": 13, "3": 14, "Cl": 15, "o": 16, "[nH]": 17, "s": 18, "-": 19, "2": 20, "4": 21, "[C@H]": 22, ")": 23, "(": 24, "Br": 25, "#": 26, "[C@@H]": 27, "[C@]": 28, "[C@@]": 29, "[O-]": 30, "[N+]": 31, "[C]": 32, "I": 33, "5": 34, "[C-]": 35, "[CH]": 36, "/": 37, "[S@]": 38, "[S@@]": 39, "[SH]": 40, "[Si]": 41, "[n+]": 42, "[C+]": 43, "[S@H]": 44, "[Fe]": 45, "[S@@H]": 46, "B": 47, "[O]": 48, "[S-]": 49, "[P@H]": 50, "[PH]": 51, "6": 52, "[O+]": 53, "[P@@H]": 54, "[NH+]": 55, "[S]": 56, "\\": 57, "[P@]": 58, "[P@@]": 59, "[N]": 60, "P": 61, "[S+]": 62, "[P]": 63, "[IH]": 64, "[se]": 65, "[C@+]": 66, "[Si@]": 67, "[c-]": 68, "7": 69, "[C@-]": 70, "[Si@@]": 71, "[Se]": 72, "[Si@H]": 73, "[S@+]": 74, "[N-]": 75, "[CnH]": 76, "[c+]": 77, "[P+]": 78, "[Si@@H]": 79, "[SiH]": 80, "[P-]": 81, "[I]": 82, "[S@-]": 83, "[CH+]": 84, "[C@H+]": 85, "[I+]": 86, "[C@@-]": 87, "8": 88, "[Si-]": 89, "[C@@H+]": 90, "[I-]": 91, "[CH-]": 92, "[P@+]": 93, "[Cn]": 94, "[C@@+]": 95, "[SnH]": 96, "[Se@]": 97, "[S@@+]": 98, "[Se@@]": 99, "[Si+]": 100, "9": 101, "[InH]": 102, "[Ce]": 103, "[I@@]": 104, "[P@-]": 105, "[Se-]": 106, "[Sc]": 107, "[SH+]": 108, "[I@@H]": 109, "[I@H]": 110, "[c]": 111, "[SH-]": 112, "[N@]": 113, "[I@]": 114, "[N@@]": 115, "[P@@-]": 116, "%10": 117, "[nH+]": 118, "[Ne]": 119, "[Si@+]": 120, "[Cl+]": 121, "[Br+]": 122, "[N@H+]": 123, "[S@@-]": 124, "[N@+]": 125, "[n-]": 126, "[NH2+]": 127, "[B]": 128, "[Cn+]": 129, "[N@@H+]": 130, "[s+]": 131, "[si]": 132, "[N@@+]": 133, "p": 134, "[P@@+]": 135, "[Sc@H]": 136, "[cH+]": 137, "[ScH]": 138, "[Fe+]": 139, "[o+]": 140, "[cH-]": 141, "[NH]": 142, "[Se+]": 143, "[Fe-]": 144, "[I@+]": 145, "[Sc@]": 146, "[B-]": 147, "[SeH]": 148, "[Se@+]": 149, "[FeH]": 150, "[Fe@@]": 151, "[Cn-]": 152, "[S@@H+]": 153, "%11": 154, "[S@H-]": 155, "[S@@H-]": 156, "[CH2-]": 157, "[CnH-]": 158, "[In]": 159, "[S@H+]": 160, "[pH]": 161, "[PH+]": 162, "[Fe@@H]": 163, "[PH-]": 164, "b": 165, "[Si@-]": 166, "[si-]": 167, "[sH+]": 168, "[Fe@H]": 169, "[P@@H-]": 170, "[Sn+]": 171, "[P@@H+]": 172, "[P@H+]": 173, "[P@H-]": 174, "[Se@H]": 175, "[Se@-]": 176, "[Sc@@H]": 177, "[I@-]": 178, "[Sn]": 179, "[IH-]": 180, "[IH+]": 181, "[F+]": 182, "[Fe@]": 183, "[Sn@]": 184, "[CnH+]": 185, "[Si@H-]": 186, "[NH-]": 187, "[Si@H+]": 188, "[Sc-]": 189, "[Se@@H]": 190, "[Sn-]": 191, "[se+]": 192, "[Si@@+]": 193, "[SiH+]": 194, "[PH2-]": 195, "[CeH]": 196, "[SH3]": 197, "[CH2]": 198, "[Sc+]": 199, "[Si@@-]": 200 } } }