haydn-jones
commited on
Commit
•
d8e9dcb
1
Parent(s):
fc3732e
Update tokenizer.json
Browse files- tokenizer.json +53 -64
tokenizer.json
CHANGED
@@ -5,14 +5,14 @@
|
|
5 |
"strategy": "BatchLongest",
|
6 |
"direction": "Right",
|
7 |
"pad_to_multiple_of": null,
|
8 |
-
"pad_id":
|
9 |
"pad_type_id": 0,
|
10 |
"pad_token": "<PAD>"
|
11 |
},
|
12 |
"added_tokens": [
|
13 |
{
|
14 |
"id": 0,
|
15 |
-
"content": "<
|
16 |
"single_word": false,
|
17 |
"lstrip": false,
|
18 |
"rstrip": false,
|
@@ -21,7 +21,7 @@
|
|
21 |
},
|
22 |
{
|
23 |
"id": 1,
|
24 |
-
"content": "<
|
25 |
"single_word": false,
|
26 |
"lstrip": false,
|
27 |
"rstrip": false,
|
@@ -30,7 +30,7 @@
|
|
30 |
},
|
31 |
{
|
32 |
"id": 2,
|
33 |
-
"content": "<
|
34 |
"single_word": false,
|
35 |
"lstrip": false,
|
36 |
"rstrip": false,
|
@@ -39,7 +39,7 @@
|
|
39 |
},
|
40 |
{
|
41 |
"id": 3,
|
42 |
-
"content": "<
|
43 |
"single_word": false,
|
44 |
"lstrip": false,
|
45 |
"rstrip": false,
|
@@ -96,7 +96,7 @@
|
|
96 |
"<CLS>": {
|
97 |
"id": "<CLS>",
|
98 |
"ids": [
|
99 |
-
|
100 |
],
|
101 |
"tokens": [
|
102 |
"<CLS>"
|
@@ -105,7 +105,7 @@
|
|
105 |
"<EOS>": {
|
106 |
"id": "<EOS>",
|
107 |
"ids": [
|
108 |
-
|
109 |
],
|
110 |
"tokens": [
|
111 |
"<EOS>"
|
@@ -113,19 +113,14 @@
|
|
113 |
}
|
114 |
}
|
115 |
},
|
116 |
-
"decoder":
|
117 |
-
"type": "Metaspace",
|
118 |
-
"replacement": "_",
|
119 |
-
"add_prefix_space": false,
|
120 |
-
"prepend_scheme": "always"
|
121 |
-
},
|
122 |
"model": {
|
123 |
"type": "WordLevel",
|
124 |
"vocab": {
|
125 |
-
"<
|
126 |
-
"<
|
127 |
-
"<
|
128 |
-
"<
|
129 |
"[C]": 4,
|
130 |
"[=C]": 5,
|
131 |
"[Ring1]": 6,
|
@@ -170,63 +165,57 @@
|
|
170 |
"[B-1]": 45,
|
171 |
"[PH1]": 46,
|
172 |
"[S-1]": 47,
|
173 |
-
"[=
|
174 |
-
"[=
|
175 |
"[=Se]": 50,
|
176 |
-
"[
|
177 |
-
"[
|
178 |
"[NH1+1]": 53,
|
179 |
"[BH2-1]": 54,
|
180 |
"[NH2+1]": 55,
|
181 |
"[O+1]": 56,
|
182 |
"[SeH1]": 57,
|
183 |
"[SH1]": 58,
|
184 |
-
"[
|
185 |
-
"[
|
186 |
-
"[=
|
187 |
-
"[=
|
188 |
"[=PH1]": 63,
|
189 |
-
"[
|
190 |
-
"[
|
191 |
-
"[=
|
192 |
-
"[
|
193 |
-
"[
|
194 |
-
"[
|
195 |
-
"[
|
196 |
-
"[
|
197 |
"[BH1-1]": 72,
|
198 |
-
"[
|
199 |
-
"[
|
200 |
"[=C-1]": 75,
|
201 |
-
"[
|
202 |
-
"[
|
203 |
-
"[=
|
204 |
-
"[
|
205 |
-
"[
|
206 |
-
"[
|
207 |
-
"[
|
208 |
-
"[
|
209 |
-
"[
|
210 |
-
"[
|
211 |
-
"[
|
212 |
-
"[
|
213 |
-
"[
|
214 |
-
"[
|
215 |
-
"[Cl
|
216 |
-
"[
|
217 |
-
"[
|
218 |
-
"[
|
219 |
-
"[
|
220 |
-
"[
|
221 |
-
"[
|
222 |
-
"[
|
223 |
-
"[
|
224 |
-
"[SH1+1]": 99,
|
225 |
-
"[SH1-1]": 100,
|
226 |
-
"[Se-1]": 101,
|
227 |
-
"[SeH2]": 102,
|
228 |
-
"[Si-1]": 103,
|
229 |
-
"[SiH1-1]": 104
|
230 |
},
|
231 |
"unk_token": "<UNK>"
|
232 |
}
|
|
|
5 |
"strategy": "BatchLongest",
|
6 |
"direction": "Right",
|
7 |
"pad_to_multiple_of": null,
|
8 |
+
"pad_id": 2,
|
9 |
"pad_type_id": 0,
|
10 |
"pad_token": "<PAD>"
|
11 |
},
|
12 |
"added_tokens": [
|
13 |
{
|
14 |
"id": 0,
|
15 |
+
"content": "<CLS>",
|
16 |
"single_word": false,
|
17 |
"lstrip": false,
|
18 |
"rstrip": false,
|
|
|
21 |
},
|
22 |
{
|
23 |
"id": 1,
|
24 |
+
"content": "<EOS>",
|
25 |
"single_word": false,
|
26 |
"lstrip": false,
|
27 |
"rstrip": false,
|
|
|
30 |
},
|
31 |
{
|
32 |
"id": 2,
|
33 |
+
"content": "<PAD>",
|
34 |
"single_word": false,
|
35 |
"lstrip": false,
|
36 |
"rstrip": false,
|
|
|
39 |
},
|
40 |
{
|
41 |
"id": 3,
|
42 |
+
"content": "<UNK>",
|
43 |
"single_word": false,
|
44 |
"lstrip": false,
|
45 |
"rstrip": false,
|
|
|
96 |
"<CLS>": {
|
97 |
"id": "<CLS>",
|
98 |
"ids": [
|
99 |
+
0
|
100 |
],
|
101 |
"tokens": [
|
102 |
"<CLS>"
|
|
|
105 |
"<EOS>": {
|
106 |
"id": "<EOS>",
|
107 |
"ids": [
|
108 |
+
1
|
109 |
],
|
110 |
"tokens": [
|
111 |
"<EOS>"
|
|
|
113 |
}
|
114 |
}
|
115 |
},
|
116 |
+
"decoder": null,
|
|
|
|
|
|
|
|
|
|
|
117 |
"model": {
|
118 |
"type": "WordLevel",
|
119 |
"vocab": {
|
120 |
+
"<CLS>": 0,
|
121 |
+
"<EOS>": 1,
|
122 |
+
"<PAD>": 2,
|
123 |
+
"<UNK>": 3,
|
124 |
"[C]": 4,
|
125 |
"[=C]": 5,
|
126 |
"[Ring1]": 6,
|
|
|
165 |
"[B-1]": 45,
|
166 |
"[PH1]": 46,
|
167 |
"[S-1]": 47,
|
168 |
+
"[=O+1]": 48,
|
169 |
+
"[=S+1]": 49,
|
170 |
"[=Se]": 50,
|
171 |
+
"[NH3+1]": 51,
|
172 |
+
"[C+1]": 52,
|
173 |
"[NH1+1]": 53,
|
174 |
"[BH2-1]": 54,
|
175 |
"[NH2+1]": 55,
|
176 |
"[O+1]": 56,
|
177 |
"[SeH1]": 57,
|
178 |
"[SH1]": 58,
|
179 |
+
"[SiH2]": 59,
|
180 |
+
"[=SH1]": 60,
|
181 |
+
"[=Se+1]": 61,
|
182 |
+
"[=OH1+1]": 62,
|
183 |
"[=PH1]": 63,
|
184 |
+
"[#C-1]": 64,
|
185 |
+
"[=NH1+1]": 65,
|
186 |
+
"[=NH2+1]": 66,
|
187 |
+
"[BH3-1]": 67,
|
188 |
+
"[CH1-1]": 68,
|
189 |
+
"[I+1]": 69,
|
190 |
+
"[CH1+1]": 70,
|
191 |
+
"[NH1-1]": 71,
|
192 |
"[BH1-1]": 72,
|
193 |
+
"[SiH1]": 73,
|
194 |
+
"[Se+1]": 74,
|
195 |
"[=C-1]": 75,
|
196 |
+
"[F+1]": 76,
|
197 |
+
"[=B]": 77,
|
198 |
+
"[=Si]": 78,
|
199 |
+
"[BH0]": 79,
|
200 |
+
"[CH1]": 80,
|
201 |
+
"[CH2+1]": 81,
|
202 |
+
"[Cl+1]": 82,
|
203 |
+
"[NH0]": 83,
|
204 |
+
"[#O+1]": 84,
|
205 |
+
"[Br+2]": 85,
|
206 |
+
"[Br-1]": 86,
|
207 |
+
"[CH2]": 87,
|
208 |
+
"[Cl+2]": 88,
|
209 |
+
"[Cl+3]": 89,
|
210 |
+
"[Cl-1]": 90,
|
211 |
+
"[F-1]": 91,
|
212 |
+
"[I+2]": 92,
|
213 |
+
"[I+3]": 93,
|
214 |
+
"[PH2+1]": 94,
|
215 |
+
"[Se-1]": 95,
|
216 |
+
"[SeH2]": 96,
|
217 |
+
"[Si-1]": 97,
|
218 |
+
"[SiH1-1]": 98
|
|
|
|
|
|
|
|
|
|
|
|
|
219 |
},
|
220 |
"unk_token": "<UNK>"
|
221 |
}
|