zpn's picture
Upload tokenizer
25688bc
raw
history blame contribute delete
No virus
5.99 kB
{
"version": "1.0",
"truncation": null,
"padding": null,
"added_tokens": [
{
"id": 0,
"content": "[UNK]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 1,
"content": "[CLS]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 2,
"content": "[SEP]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 3,
"content": "[PAD]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
},
{
"id": 4,
"content": "[MASK]",
"single_word": false,
"lstrip": false,
"rstrip": false,
"normalized": false,
"special": true
}
],
"normalizer": null,
"pre_tokenizer": {
"type": "WhitespaceSplit"
},
"post_processor": {
"type": "TemplateProcessing",
"single": [
{
"SpecialToken": {
"id": "[CLS]",
"type_id": 0
}
},
{
"Sequence": {
"id": "A",
"type_id": 0
}
},
{
"SpecialToken": {
"id": "[SEP]",
"type_id": 0
}
}
],
"pair": [
{
"Sequence": {
"id": "A",
"type_id": 0
}
},
{
"Sequence": {
"id": "B",
"type_id": 1
}
}
],
"special_tokens": {
"[CLS]": {
"id": "[CLS]",
"ids": [
1
],
"tokens": [
"[CLS]"
]
},
"[SEP]": {
"id": "[SEP]",
"ids": [
2
],
"tokens": [
"[SEP]"
]
}
}
},
"decoder": null,
"model": {
"type": "WordLevel",
"vocab": {
"[UNK]": 0,
"[CLS]": 1,
"[SEP]": 2,
"[PAD]": 3,
"[MASK]": 4,
"[=Branch1]": 5,
"[Branch1]": 6,
"[=C]": 7,
"[Ring1]": 8,
"[N]": 9,
"[=O]": 10,
"[O]": 11,
"[Ring2]": 12,
"[=N]": 13,
"[C@H1]": 14,
"[C@@H1]": 15,
"[Branch2]": 16,
"[F]": 17,
"[S]": 18,
"[=Branch2]": 19,
"[#Branch1]": 20,
"[NH1+1]": 21,
"[=Ring1]": 22,
"[Cl]": 23,
"[#Branch2]": 24,
"[NH1]": 25,
"[NH2+1]": 26,
"[#C]": 27,
"[Br]": 28,
"[#N]": 29,
"[/C]": 30,
"[C]": 31,
"[P]": 32,
"[C@@]": 33,
"[C@]": 34,
"[O-1]": 35,
"[NH3+1]": 36,
"[N-1]": 37,
"[\\C]": 38,
"[=NH1+1]": 39,
"[=S]": 40,
"[I]": 41,
"[/N]": 42,
"[\\-Ring1]": 43,
"[/S]": 44,
"[/-Ring1]": 45,
"[/Cl]": 46,
"[\\N]": 47,
"[\\Cl]": 48,
"[=NH2+1]": 49,
"[/NH1+1]": 50,
"[/O]": 51,
"[H]": 52,
"[Si]": 53,
"[\\O]": 54,
"[=Ring2]": 55,
"[\\-Ring2]": 56,
"[N+1]": 57,
"[\\S]": 58,
"[S-1]": 59,
"[/-Ring2]": 60,
"[/C@@H1]": 61,
"[/C@H1]": 62,
"[\\C@@H1]": 63,
"[S@]": 64,
"[=N+1]": 65,
"[S@@]": 66,
"[\\C@H1]": 67,
"[/NH1]": 68,
"[B]": 69,
"[/F]": 70,
"[CH1]": 71,
"[CH0]": 72,
"[\\O-1]": 73,
"[/O-1]": 74,
"[\\F]": 75,
"[/Br]": 76,
"[/C@]": 77,
"[\\NH1]": 78,
"[\\C@]": 79,
"[\\NH1+1]": 80,
"[P@@]": 81,
"[/C@@]": 82,
"[P@]": 83,
"[\\Br]": 84,
"[\\C@@]": 85,
"[/I]": 86,
"[S+1]": 87,
"[N@+1]": 88,
"[N@@+1]": 89,
"[/N+1]": 90,
"[CH2]": 91,
"[Sn]": 92,
"[OH0]": 93,
"[\\I]": 94,
"[/NH2+1]": 95,
"[\\N+1]": 96,
"[=S+1]": 97,
"[\\NH2+1]": 98,
"[/OH0]": 99,
"[=S@]": 100,
"[=S@@]": 101,
"[=P]": 102,
"[P+1]": 103,
"[/H]": 104,
"[/P]": 105,
"[/NH3+1]": 106,
"[\\H]": 107,
"[B-1]": 108,
"[S@@+1]": 109,
"[\\P]": 110,
"[C+1]": 111,
"[S@+1]": 112,
"[=O+1]": 113,
"[/Si]": 114,
"[\\NH3+1]": 115,
"[N@]": 116,
"[NH0]": 117,
"[\\OH0]": 118,
"[C-1]": 119,
"[/S@]": 120,
"[/S@@]": 121,
"[Si@@]": 122,
"[P@@H1]": 123,
"[\\Si]": 124,
"[/Sn]": 125,
"[CH1-1]": 126,
"[Si@]": 127,
"[/N-1]": 128,
"[N@@]": 129,
"[=NH0]": 130,
"[BH3-1]": 131,
"[IH2]": 132,
"[\\B]": 133,
"[/B]": 134,
"[Sn@]": 135,
"[P@+1]": 136,
"[P@@+1]": 137,
"[/S+1]": 138,
"[Sn@@]": 139,
"[=B]": 140,
"[=IH2]": 141,
"[BH1-1]": 142,
"[P@H1]": 143,
"[#N+1]": 144,
"[=P@@]": 145,
"[=P@]": 146,
"[O+1]": 147,
"[SnH2+1]": 148,
"[SnH4+2]": 149,
"[/CH0]": 150,
"[=17O]": 151,
"[=CH0]": 152,
"[SnH1]": 153,
"[\\N-1]": 154,
"[\\S@]": 155,
"[P@@H1+1]": 156,
"[B@-1]": 157,
"[B@@-1]": 158,
"[I+1]": 159,
"[Sn+1]": 160,
"[Sn+2]": 161,
"[Br+1]": 162,
"[\\P@@]": 163,
"[\\P@]": 164,
"[\\Sn]": 165,
"[#S]": 166,
"[/CH1]": 167,
"[/NH0]": 168,
"[17O]": 169,
"[18OH1]": 170,
"[=Si]": 171,
"[BH2-1]": 172,
"[S@@-1]": 173,
"[S@@H1]": 174,
"[Sn+3]": 175,
"[SnH2]": 176,
"[SnH6+3]": 177,
"[\\C-1]": 178,
"[\\NH0]": 179,
"[\\S+1]": 180,
"[#P]": 181,
"[/CH2]": 182,
"[/O+1]": 183,
"[/P@@]": 184,
"[/P@]": 185,
"[/Si@@]": 186,
"[/Si@]": 187,
"[S@H1]": 188,
"[SH3]": 189,
"[SiH2]": 190,
"[SiH3]": 191,
"[\\C+1]": 192,
"[\\S@@]": 193
},
"unk_token": "[UNK]"
}
}