haydn-jones commited on
Commit
d8e9dcb
1 Parent(s): fc3732e

Update tokenizer.json

Browse files
Files changed (1) hide show
  1. tokenizer.json +53 -64
tokenizer.json CHANGED
@@ -5,14 +5,14 @@
5
  "strategy": "BatchLongest",
6
  "direction": "Right",
7
  "pad_to_multiple_of": null,
8
- "pad_id": 3,
9
  "pad_type_id": 0,
10
  "pad_token": "<PAD>"
11
  },
12
  "added_tokens": [
13
  {
14
  "id": 0,
15
- "content": "<UNK>",
16
  "single_word": false,
17
  "lstrip": false,
18
  "rstrip": false,
@@ -21,7 +21,7 @@
21
  },
22
  {
23
  "id": 1,
24
- "content": "<CLS>",
25
  "single_word": false,
26
  "lstrip": false,
27
  "rstrip": false,
@@ -30,7 +30,7 @@
30
  },
31
  {
32
  "id": 2,
33
- "content": "<EOS>",
34
  "single_word": false,
35
  "lstrip": false,
36
  "rstrip": false,
@@ -39,7 +39,7 @@
39
  },
40
  {
41
  "id": 3,
42
- "content": "<PAD>",
43
  "single_word": false,
44
  "lstrip": false,
45
  "rstrip": false,
@@ -96,7 +96,7 @@
96
  "<CLS>": {
97
  "id": "<CLS>",
98
  "ids": [
99
- 1
100
  ],
101
  "tokens": [
102
  "<CLS>"
@@ -105,7 +105,7 @@
105
  "<EOS>": {
106
  "id": "<EOS>",
107
  "ids": [
108
- 2
109
  ],
110
  "tokens": [
111
  "<EOS>"
@@ -113,19 +113,14 @@
113
  }
114
  }
115
  },
116
- "decoder": {
117
- "type": "Metaspace",
118
- "replacement": "_",
119
- "add_prefix_space": false,
120
- "prepend_scheme": "always"
121
- },
122
  "model": {
123
  "type": "WordLevel",
124
  "vocab": {
125
- "<UNK>": 0,
126
- "<CLS>": 1,
127
- "<EOS>": 2,
128
- "<PAD>": 3,
129
  "[C]": 4,
130
  "[=C]": 5,
131
  "[Ring1]": 6,
@@ -170,63 +165,57 @@
170
  "[B-1]": 45,
171
  "[PH1]": 46,
172
  "[S-1]": 47,
173
- "[=S+1]": 48,
174
- "[=O+1]": 49,
175
  "[=Se]": 50,
176
- "[C+1]": 51,
177
- "[NH3+1]": 52,
178
  "[NH1+1]": 53,
179
  "[BH2-1]": 54,
180
  "[NH2+1]": 55,
181
  "[O+1]": 56,
182
  "[SeH1]": 57,
183
  "[SH1]": 58,
184
- "[=Se+1]": 59,
185
- "[SiH2]": 60,
186
- "[=OH1+1]": 61,
187
- "[=SH1]": 62,
188
  "[=PH1]": 63,
189
- "[I+1]": 64,
190
- "[#C-1]": 65,
191
- "[=NH1+1]": 66,
192
- "[CH1-1]": 67,
193
- "[=NH2+1]": 68,
194
- "[BH3-1]": 69,
195
- "[NH1-1]": 70,
196
- "[CH1+1]": 71,
197
  "[BH1-1]": 72,
198
- "[Se+1]": 73,
199
- "[SiH1]": 74,
200
  "[=C-1]": 75,
201
- "[=Si]": 76,
202
- "[F+1]": 77,
203
- "[=B-1]": 78,
204
- "[=B]": 79,
205
- "[BH0]": 80,
206
- "[CH1]": 81,
207
- "[CH2+1]": 82,
208
- "[Cl+1]": 83,
209
- "[NH0]": 84,
210
- "[#O+1]": 85,
211
- "[#S]": 86,
212
- "[Br+2]": 87,
213
- "[Br-1]": 88,
214
- "[CH2]": 89,
215
- "[Cl+2]": 90,
216
- "[Cl+3]": 91,
217
- "[Cl-1]": 92,
218
- "[F-1]": 93,
219
- "[I+2]": 94,
220
- "[I+3]": 95,
221
- "[I-1]": 96,
222
- "[OH1+1]": 97,
223
- "[PH2+1]": 98,
224
- "[SH1+1]": 99,
225
- "[SH1-1]": 100,
226
- "[Se-1]": 101,
227
- "[SeH2]": 102,
228
- "[Si-1]": 103,
229
- "[SiH1-1]": 104
230
  },
231
  "unk_token": "<UNK>"
232
  }
 
5
  "strategy": "BatchLongest",
6
  "direction": "Right",
7
  "pad_to_multiple_of": null,
8
+ "pad_id": 2,
9
  "pad_type_id": 0,
10
  "pad_token": "<PAD>"
11
  },
12
  "added_tokens": [
13
  {
14
  "id": 0,
15
+ "content": "<CLS>",
16
  "single_word": false,
17
  "lstrip": false,
18
  "rstrip": false,
 
21
  },
22
  {
23
  "id": 1,
24
+ "content": "<EOS>",
25
  "single_word": false,
26
  "lstrip": false,
27
  "rstrip": false,
 
30
  },
31
  {
32
  "id": 2,
33
+ "content": "<PAD>",
34
  "single_word": false,
35
  "lstrip": false,
36
  "rstrip": false,
 
39
  },
40
  {
41
  "id": 3,
42
+ "content": "<UNK>",
43
  "single_word": false,
44
  "lstrip": false,
45
  "rstrip": false,
 
96
  "<CLS>": {
97
  "id": "<CLS>",
98
  "ids": [
99
+ 0
100
  ],
101
  "tokens": [
102
  "<CLS>"
 
105
  "<EOS>": {
106
  "id": "<EOS>",
107
  "ids": [
108
+ 1
109
  ],
110
  "tokens": [
111
  "<EOS>"
 
113
  }
114
  }
115
  },
116
+ "decoder": null,
 
 
 
 
 
117
  "model": {
118
  "type": "WordLevel",
119
  "vocab": {
120
+ "<CLS>": 0,
121
+ "<EOS>": 1,
122
+ "<PAD>": 2,
123
+ "<UNK>": 3,
124
  "[C]": 4,
125
  "[=C]": 5,
126
  "[Ring1]": 6,
 
165
  "[B-1]": 45,
166
  "[PH1]": 46,
167
  "[S-1]": 47,
168
+ "[=O+1]": 48,
169
+ "[=S+1]": 49,
170
  "[=Se]": 50,
171
+ "[NH3+1]": 51,
172
+ "[C+1]": 52,
173
  "[NH1+1]": 53,
174
  "[BH2-1]": 54,
175
  "[NH2+1]": 55,
176
  "[O+1]": 56,
177
  "[SeH1]": 57,
178
  "[SH1]": 58,
179
+ "[SiH2]": 59,
180
+ "[=SH1]": 60,
181
+ "[=Se+1]": 61,
182
+ "[=OH1+1]": 62,
183
  "[=PH1]": 63,
184
+ "[#C-1]": 64,
185
+ "[=NH1+1]": 65,
186
+ "[=NH2+1]": 66,
187
+ "[BH3-1]": 67,
188
+ "[CH1-1]": 68,
189
+ "[I+1]": 69,
190
+ "[CH1+1]": 70,
191
+ "[NH1-1]": 71,
192
  "[BH1-1]": 72,
193
+ "[SiH1]": 73,
194
+ "[Se+1]": 74,
195
  "[=C-1]": 75,
196
+ "[F+1]": 76,
197
+ "[=B]": 77,
198
+ "[=Si]": 78,
199
+ "[BH0]": 79,
200
+ "[CH1]": 80,
201
+ "[CH2+1]": 81,
202
+ "[Cl+1]": 82,
203
+ "[NH0]": 83,
204
+ "[#O+1]": 84,
205
+ "[Br+2]": 85,
206
+ "[Br-1]": 86,
207
+ "[CH2]": 87,
208
+ "[Cl+2]": 88,
209
+ "[Cl+3]": 89,
210
+ "[Cl-1]": 90,
211
+ "[F-1]": 91,
212
+ "[I+2]": 92,
213
+ "[I+3]": 93,
214
+ "[PH2+1]": 94,
215
+ "[Se-1]": 95,
216
+ "[SeH2]": 96,
217
+ "[Si-1]": 97,
218
+ "[SiH1-1]": 98
 
 
 
 
 
 
219
  },
220
  "unk_token": "<UNK>"
221
  }