impactframes cyan2k commited on
Commit
4f123fd
0 Parent(s):

Duplicate from cyan2k/molmo-7B-D-bnb-4bit

Browse files

Co-authored-by: Andre Ratzenberger <cyan2k@users.noreply.huggingface.co>

.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ license: apache-2.0
4
+ base_model:
5
+ - allenai/Molmo-7B-D-0924
6
+ ---
7
+
8
+ Molmo-7B-D BnB 4bit quant
9
+ 30GB -> 7GB
10
+
11
+ approx. 12GB VRAM required
12
+
13
+ base model for more information:
14
+
15
+ https://huggingface.co/allenai/Molmo-7B-D-0924
16
+
17
+ example code:
18
+
19
+ https://github.com/cyan2k/molmo-7b-bnb-4bit
20
+
21
+ performance metrics & benchmarks to compare with base will follow over the next week
added_tokens.json ADDED
@@ -0,0 +1,428 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "<im_col>": 152067,
3
+ "<im_end>": 152065,
4
+ "<im_patch>": 152066,
5
+ "<im_start>": 152064,
6
+ "<|endoftext|>": 151643,
7
+ "<|im_end|>": 151645,
8
+ "<|im_start|>": 151644,
9
+ "<|image|>": 152068,
10
+ "|<EXTRA_TOKENS_0>|": 151646,
11
+ "|<EXTRA_TOKENS_100>|": 151746,
12
+ "|<EXTRA_TOKENS_101>|": 151747,
13
+ "|<EXTRA_TOKENS_102>|": 151748,
14
+ "|<EXTRA_TOKENS_103>|": 151749,
15
+ "|<EXTRA_TOKENS_104>|": 151750,
16
+ "|<EXTRA_TOKENS_105>|": 151751,
17
+ "|<EXTRA_TOKENS_106>|": 151752,
18
+ "|<EXTRA_TOKENS_107>|": 151753,
19
+ "|<EXTRA_TOKENS_108>|": 151754,
20
+ "|<EXTRA_TOKENS_109>|": 151755,
21
+ "|<EXTRA_TOKENS_10>|": 151656,
22
+ "|<EXTRA_TOKENS_110>|": 151756,
23
+ "|<EXTRA_TOKENS_111>|": 151757,
24
+ "|<EXTRA_TOKENS_112>|": 151758,
25
+ "|<EXTRA_TOKENS_113>|": 151759,
26
+ "|<EXTRA_TOKENS_114>|": 151760,
27
+ "|<EXTRA_TOKENS_115>|": 151761,
28
+ "|<EXTRA_TOKENS_116>|": 151762,
29
+ "|<EXTRA_TOKENS_117>|": 151763,
30
+ "|<EXTRA_TOKENS_118>|": 151764,
31
+ "|<EXTRA_TOKENS_119>|": 151765,
32
+ "|<EXTRA_TOKENS_11>|": 151657,
33
+ "|<EXTRA_TOKENS_120>|": 151766,
34
+ "|<EXTRA_TOKENS_121>|": 151767,
35
+ "|<EXTRA_TOKENS_122>|": 151768,
36
+ "|<EXTRA_TOKENS_123>|": 151769,
37
+ "|<EXTRA_TOKENS_124>|": 151770,
38
+ "|<EXTRA_TOKENS_125>|": 151771,
39
+ "|<EXTRA_TOKENS_126>|": 151772,
40
+ "|<EXTRA_TOKENS_127>|": 151773,
41
+ "|<EXTRA_TOKENS_128>|": 151774,
42
+ "|<EXTRA_TOKENS_129>|": 151775,
43
+ "|<EXTRA_TOKENS_12>|": 151658,
44
+ "|<EXTRA_TOKENS_130>|": 151776,
45
+ "|<EXTRA_TOKENS_131>|": 151777,
46
+ "|<EXTRA_TOKENS_132>|": 151778,
47
+ "|<EXTRA_TOKENS_133>|": 151779,
48
+ "|<EXTRA_TOKENS_134>|": 151780,
49
+ "|<EXTRA_TOKENS_135>|": 151781,
50
+ "|<EXTRA_TOKENS_136>|": 151782,
51
+ "|<EXTRA_TOKENS_137>|": 151783,
52
+ "|<EXTRA_TOKENS_138>|": 151784,
53
+ "|<EXTRA_TOKENS_139>|": 151785,
54
+ "|<EXTRA_TOKENS_13>|": 151659,
55
+ "|<EXTRA_TOKENS_140>|": 151786,
56
+ "|<EXTRA_TOKENS_141>|": 151787,
57
+ "|<EXTRA_TOKENS_142>|": 151788,
58
+ "|<EXTRA_TOKENS_143>|": 151789,
59
+ "|<EXTRA_TOKENS_144>|": 151790,
60
+ "|<EXTRA_TOKENS_145>|": 151791,
61
+ "|<EXTRA_TOKENS_146>|": 151792,
62
+ "|<EXTRA_TOKENS_147>|": 151793,
63
+ "|<EXTRA_TOKENS_148>|": 151794,
64
+ "|<EXTRA_TOKENS_149>|": 151795,
65
+ "|<EXTRA_TOKENS_14>|": 151660,
66
+ "|<EXTRA_TOKENS_150>|": 151796,
67
+ "|<EXTRA_TOKENS_151>|": 151797,
68
+ "|<EXTRA_TOKENS_152>|": 151798,
69
+ "|<EXTRA_TOKENS_153>|": 151799,
70
+ "|<EXTRA_TOKENS_154>|": 151800,
71
+ "|<EXTRA_TOKENS_155>|": 151801,
72
+ "|<EXTRA_TOKENS_156>|": 151802,
73
+ "|<EXTRA_TOKENS_157>|": 151803,
74
+ "|<EXTRA_TOKENS_158>|": 151804,
75
+ "|<EXTRA_TOKENS_159>|": 151805,
76
+ "|<EXTRA_TOKENS_15>|": 151661,
77
+ "|<EXTRA_TOKENS_160>|": 151806,
78
+ "|<EXTRA_TOKENS_161>|": 151807,
79
+ "|<EXTRA_TOKENS_162>|": 151808,
80
+ "|<EXTRA_TOKENS_163>|": 151809,
81
+ "|<EXTRA_TOKENS_164>|": 151810,
82
+ "|<EXTRA_TOKENS_165>|": 151811,
83
+ "|<EXTRA_TOKENS_166>|": 151812,
84
+ "|<EXTRA_TOKENS_167>|": 151813,
85
+ "|<EXTRA_TOKENS_168>|": 151814,
86
+ "|<EXTRA_TOKENS_169>|": 151815,
87
+ "|<EXTRA_TOKENS_16>|": 151662,
88
+ "|<EXTRA_TOKENS_170>|": 151816,
89
+ "|<EXTRA_TOKENS_171>|": 151817,
90
+ "|<EXTRA_TOKENS_172>|": 151818,
91
+ "|<EXTRA_TOKENS_173>|": 151819,
92
+ "|<EXTRA_TOKENS_174>|": 151820,
93
+ "|<EXTRA_TOKENS_175>|": 151821,
94
+ "|<EXTRA_TOKENS_176>|": 151822,
95
+ "|<EXTRA_TOKENS_177>|": 151823,
96
+ "|<EXTRA_TOKENS_178>|": 151824,
97
+ "|<EXTRA_TOKENS_179>|": 151825,
98
+ "|<EXTRA_TOKENS_17>|": 151663,
99
+ "|<EXTRA_TOKENS_180>|": 151826,
100
+ "|<EXTRA_TOKENS_181>|": 151827,
101
+ "|<EXTRA_TOKENS_182>|": 151828,
102
+ "|<EXTRA_TOKENS_183>|": 151829,
103
+ "|<EXTRA_TOKENS_184>|": 151830,
104
+ "|<EXTRA_TOKENS_185>|": 151831,
105
+ "|<EXTRA_TOKENS_186>|": 151832,
106
+ "|<EXTRA_TOKENS_187>|": 151833,
107
+ "|<EXTRA_TOKENS_188>|": 151834,
108
+ "|<EXTRA_TOKENS_189>|": 151835,
109
+ "|<EXTRA_TOKENS_18>|": 151664,
110
+ "|<EXTRA_TOKENS_190>|": 151836,
111
+ "|<EXTRA_TOKENS_191>|": 151837,
112
+ "|<EXTRA_TOKENS_192>|": 151838,
113
+ "|<EXTRA_TOKENS_193>|": 151839,
114
+ "|<EXTRA_TOKENS_194>|": 151840,
115
+ "|<EXTRA_TOKENS_195>|": 151841,
116
+ "|<EXTRA_TOKENS_196>|": 151842,
117
+ "|<EXTRA_TOKENS_197>|": 151843,
118
+ "|<EXTRA_TOKENS_198>|": 151844,
119
+ "|<EXTRA_TOKENS_199>|": 151845,
120
+ "|<EXTRA_TOKENS_19>|": 151665,
121
+ "|<EXTRA_TOKENS_1>|": 151647,
122
+ "|<EXTRA_TOKENS_200>|": 151846,
123
+ "|<EXTRA_TOKENS_201>|": 151847,
124
+ "|<EXTRA_TOKENS_202>|": 151848,
125
+ "|<EXTRA_TOKENS_203>|": 151849,
126
+ "|<EXTRA_TOKENS_204>|": 151850,
127
+ "|<EXTRA_TOKENS_205>|": 151851,
128
+ "|<EXTRA_TOKENS_206>|": 151852,
129
+ "|<EXTRA_TOKENS_207>|": 151853,
130
+ "|<EXTRA_TOKENS_208>|": 151854,
131
+ "|<EXTRA_TOKENS_209>|": 151855,
132
+ "|<EXTRA_TOKENS_20>|": 151666,
133
+ "|<EXTRA_TOKENS_210>|": 151856,
134
+ "|<EXTRA_TOKENS_211>|": 151857,
135
+ "|<EXTRA_TOKENS_212>|": 151858,
136
+ "|<EXTRA_TOKENS_213>|": 151859,
137
+ "|<EXTRA_TOKENS_214>|": 151860,
138
+ "|<EXTRA_TOKENS_215>|": 151861,
139
+ "|<EXTRA_TOKENS_216>|": 151862,
140
+ "|<EXTRA_TOKENS_217>|": 151863,
141
+ "|<EXTRA_TOKENS_218>|": 151864,
142
+ "|<EXTRA_TOKENS_219>|": 151865,
143
+ "|<EXTRA_TOKENS_21>|": 151667,
144
+ "|<EXTRA_TOKENS_220>|": 151866,
145
+ "|<EXTRA_TOKENS_221>|": 151867,
146
+ "|<EXTRA_TOKENS_222>|": 151868,
147
+ "|<EXTRA_TOKENS_223>|": 151869,
148
+ "|<EXTRA_TOKENS_224>|": 151870,
149
+ "|<EXTRA_TOKENS_225>|": 151871,
150
+ "|<EXTRA_TOKENS_226>|": 151872,
151
+ "|<EXTRA_TOKENS_227>|": 151873,
152
+ "|<EXTRA_TOKENS_228>|": 151874,
153
+ "|<EXTRA_TOKENS_229>|": 151875,
154
+ "|<EXTRA_TOKENS_22>|": 151668,
155
+ "|<EXTRA_TOKENS_230>|": 151876,
156
+ "|<EXTRA_TOKENS_231>|": 151877,
157
+ "|<EXTRA_TOKENS_232>|": 151878,
158
+ "|<EXTRA_TOKENS_233>|": 151879,
159
+ "|<EXTRA_TOKENS_234>|": 151880,
160
+ "|<EXTRA_TOKENS_235>|": 151881,
161
+ "|<EXTRA_TOKENS_236>|": 151882,
162
+ "|<EXTRA_TOKENS_237>|": 151883,
163
+ "|<EXTRA_TOKENS_238>|": 151884,
164
+ "|<EXTRA_TOKENS_239>|": 151885,
165
+ "|<EXTRA_TOKENS_23>|": 151669,
166
+ "|<EXTRA_TOKENS_240>|": 151886,
167
+ "|<EXTRA_TOKENS_241>|": 151887,
168
+ "|<EXTRA_TOKENS_242>|": 151888,
169
+ "|<EXTRA_TOKENS_243>|": 151889,
170
+ "|<EXTRA_TOKENS_244>|": 151890,
171
+ "|<EXTRA_TOKENS_245>|": 151891,
172
+ "|<EXTRA_TOKENS_246>|": 151892,
173
+ "|<EXTRA_TOKENS_247>|": 151893,
174
+ "|<EXTRA_TOKENS_248>|": 151894,
175
+ "|<EXTRA_TOKENS_249>|": 151895,
176
+ "|<EXTRA_TOKENS_24>|": 151670,
177
+ "|<EXTRA_TOKENS_250>|": 151896,
178
+ "|<EXTRA_TOKENS_251>|": 151897,
179
+ "|<EXTRA_TOKENS_252>|": 151898,
180
+ "|<EXTRA_TOKENS_253>|": 151899,
181
+ "|<EXTRA_TOKENS_254>|": 151900,
182
+ "|<EXTRA_TOKENS_255>|": 151901,
183
+ "|<EXTRA_TOKENS_256>|": 151902,
184
+ "|<EXTRA_TOKENS_257>|": 151903,
185
+ "|<EXTRA_TOKENS_258>|": 151904,
186
+ "|<EXTRA_TOKENS_259>|": 151905,
187
+ "|<EXTRA_TOKENS_25>|": 151671,
188
+ "|<EXTRA_TOKENS_260>|": 151906,
189
+ "|<EXTRA_TOKENS_261>|": 151907,
190
+ "|<EXTRA_TOKENS_262>|": 151908,
191
+ "|<EXTRA_TOKENS_263>|": 151909,
192
+ "|<EXTRA_TOKENS_264>|": 151910,
193
+ "|<EXTRA_TOKENS_265>|": 151911,
194
+ "|<EXTRA_TOKENS_266>|": 151912,
195
+ "|<EXTRA_TOKENS_267>|": 151913,
196
+ "|<EXTRA_TOKENS_268>|": 151914,
197
+ "|<EXTRA_TOKENS_269>|": 151915,
198
+ "|<EXTRA_TOKENS_26>|": 151672,
199
+ "|<EXTRA_TOKENS_270>|": 151916,
200
+ "|<EXTRA_TOKENS_271>|": 151917,
201
+ "|<EXTRA_TOKENS_272>|": 151918,
202
+ "|<EXTRA_TOKENS_273>|": 151919,
203
+ "|<EXTRA_TOKENS_274>|": 151920,
204
+ "|<EXTRA_TOKENS_275>|": 151921,
205
+ "|<EXTRA_TOKENS_276>|": 151922,
206
+ "|<EXTRA_TOKENS_277>|": 151923,
207
+ "|<EXTRA_TOKENS_278>|": 151924,
208
+ "|<EXTRA_TOKENS_279>|": 151925,
209
+ "|<EXTRA_TOKENS_27>|": 151673,
210
+ "|<EXTRA_TOKENS_280>|": 151926,
211
+ "|<EXTRA_TOKENS_281>|": 151927,
212
+ "|<EXTRA_TOKENS_282>|": 151928,
213
+ "|<EXTRA_TOKENS_283>|": 151929,
214
+ "|<EXTRA_TOKENS_284>|": 151930,
215
+ "|<EXTRA_TOKENS_285>|": 151931,
216
+ "|<EXTRA_TOKENS_286>|": 151932,
217
+ "|<EXTRA_TOKENS_287>|": 151933,
218
+ "|<EXTRA_TOKENS_288>|": 151934,
219
+ "|<EXTRA_TOKENS_289>|": 151935,
220
+ "|<EXTRA_TOKENS_28>|": 151674,
221
+ "|<EXTRA_TOKENS_290>|": 151936,
222
+ "|<EXTRA_TOKENS_291>|": 151937,
223
+ "|<EXTRA_TOKENS_292>|": 151938,
224
+ "|<EXTRA_TOKENS_293>|": 151939,
225
+ "|<EXTRA_TOKENS_294>|": 151940,
226
+ "|<EXTRA_TOKENS_295>|": 151941,
227
+ "|<EXTRA_TOKENS_296>|": 151942,
228
+ "|<EXTRA_TOKENS_297>|": 151943,
229
+ "|<EXTRA_TOKENS_298>|": 151944,
230
+ "|<EXTRA_TOKENS_299>|": 151945,
231
+ "|<EXTRA_TOKENS_29>|": 151675,
232
+ "|<EXTRA_TOKENS_2>|": 151648,
233
+ "|<EXTRA_TOKENS_300>|": 151946,
234
+ "|<EXTRA_TOKENS_301>|": 151947,
235
+ "|<EXTRA_TOKENS_302>|": 151948,
236
+ "|<EXTRA_TOKENS_303>|": 151949,
237
+ "|<EXTRA_TOKENS_304>|": 151950,
238
+ "|<EXTRA_TOKENS_305>|": 151951,
239
+ "|<EXTRA_TOKENS_306>|": 151952,
240
+ "|<EXTRA_TOKENS_307>|": 151953,
241
+ "|<EXTRA_TOKENS_308>|": 151954,
242
+ "|<EXTRA_TOKENS_309>|": 151955,
243
+ "|<EXTRA_TOKENS_30>|": 151676,
244
+ "|<EXTRA_TOKENS_310>|": 151956,
245
+ "|<EXTRA_TOKENS_311>|": 151957,
246
+ "|<EXTRA_TOKENS_312>|": 151958,
247
+ "|<EXTRA_TOKENS_313>|": 151959,
248
+ "|<EXTRA_TOKENS_314>|": 151960,
249
+ "|<EXTRA_TOKENS_315>|": 151961,
250
+ "|<EXTRA_TOKENS_316>|": 151962,
251
+ "|<EXTRA_TOKENS_317>|": 151963,
252
+ "|<EXTRA_TOKENS_318>|": 151964,
253
+ "|<EXTRA_TOKENS_319>|": 151965,
254
+ "|<EXTRA_TOKENS_31>|": 151677,
255
+ "|<EXTRA_TOKENS_320>|": 151966,
256
+ "|<EXTRA_TOKENS_321>|": 151967,
257
+ "|<EXTRA_TOKENS_322>|": 151968,
258
+ "|<EXTRA_TOKENS_323>|": 151969,
259
+ "|<EXTRA_TOKENS_324>|": 151970,
260
+ "|<EXTRA_TOKENS_325>|": 151971,
261
+ "|<EXTRA_TOKENS_326>|": 151972,
262
+ "|<EXTRA_TOKENS_327>|": 151973,
263
+ "|<EXTRA_TOKENS_328>|": 151974,
264
+ "|<EXTRA_TOKENS_329>|": 151975,
265
+ "|<EXTRA_TOKENS_32>|": 151678,
266
+ "|<EXTRA_TOKENS_330>|": 151976,
267
+ "|<EXTRA_TOKENS_331>|": 151977,
268
+ "|<EXTRA_TOKENS_332>|": 151978,
269
+ "|<EXTRA_TOKENS_333>|": 151979,
270
+ "|<EXTRA_TOKENS_334>|": 151980,
271
+ "|<EXTRA_TOKENS_335>|": 151981,
272
+ "|<EXTRA_TOKENS_336>|": 151982,
273
+ "|<EXTRA_TOKENS_337>|": 151983,
274
+ "|<EXTRA_TOKENS_338>|": 151984,
275
+ "|<EXTRA_TOKENS_339>|": 151985,
276
+ "|<EXTRA_TOKENS_33>|": 151679,
277
+ "|<EXTRA_TOKENS_340>|": 151986,
278
+ "|<EXTRA_TOKENS_341>|": 151987,
279
+ "|<EXTRA_TOKENS_342>|": 151988,
280
+ "|<EXTRA_TOKENS_343>|": 151989,
281
+ "|<EXTRA_TOKENS_344>|": 151990,
282
+ "|<EXTRA_TOKENS_345>|": 151991,
283
+ "|<EXTRA_TOKENS_346>|": 151992,
284
+ "|<EXTRA_TOKENS_347>|": 151993,
285
+ "|<EXTRA_TOKENS_348>|": 151994,
286
+ "|<EXTRA_TOKENS_349>|": 151995,
287
+ "|<EXTRA_TOKENS_34>|": 151680,
288
+ "|<EXTRA_TOKENS_350>|": 151996,
289
+ "|<EXTRA_TOKENS_351>|": 151997,
290
+ "|<EXTRA_TOKENS_352>|": 151998,
291
+ "|<EXTRA_TOKENS_353>|": 151999,
292
+ "|<EXTRA_TOKENS_354>|": 152000,
293
+ "|<EXTRA_TOKENS_355>|": 152001,
294
+ "|<EXTRA_TOKENS_356>|": 152002,
295
+ "|<EXTRA_TOKENS_357>|": 152003,
296
+ "|<EXTRA_TOKENS_358>|": 152004,
297
+ "|<EXTRA_TOKENS_359>|": 152005,
298
+ "|<EXTRA_TOKENS_35>|": 151681,
299
+ "|<EXTRA_TOKENS_360>|": 152006,
300
+ "|<EXTRA_TOKENS_361>|": 152007,
301
+ "|<EXTRA_TOKENS_362>|": 152008,
302
+ "|<EXTRA_TOKENS_363>|": 152009,
303
+ "|<EXTRA_TOKENS_364>|": 152010,
304
+ "|<EXTRA_TOKENS_365>|": 152011,
305
+ "|<EXTRA_TOKENS_366>|": 152012,
306
+ "|<EXTRA_TOKENS_367>|": 152013,
307
+ "|<EXTRA_TOKENS_368>|": 152014,
308
+ "|<EXTRA_TOKENS_369>|": 152015,
309
+ "|<EXTRA_TOKENS_36>|": 151682,
310
+ "|<EXTRA_TOKENS_370>|": 152016,
311
+ "|<EXTRA_TOKENS_371>|": 152017,
312
+ "|<EXTRA_TOKENS_372>|": 152018,
313
+ "|<EXTRA_TOKENS_373>|": 152019,
314
+ "|<EXTRA_TOKENS_374>|": 152020,
315
+ "|<EXTRA_TOKENS_375>|": 152021,
316
+ "|<EXTRA_TOKENS_376>|": 152022,
317
+ "|<EXTRA_TOKENS_377>|": 152023,
318
+ "|<EXTRA_TOKENS_378>|": 152024,
319
+ "|<EXTRA_TOKENS_379>|": 152025,
320
+ "|<EXTRA_TOKENS_37>|": 151683,
321
+ "|<EXTRA_TOKENS_380>|": 152026,
322
+ "|<EXTRA_TOKENS_381>|": 152027,
323
+ "|<EXTRA_TOKENS_382>|": 152028,
324
+ "|<EXTRA_TOKENS_383>|": 152029,
325
+ "|<EXTRA_TOKENS_384>|": 152030,
326
+ "|<EXTRA_TOKENS_385>|": 152031,
327
+ "|<EXTRA_TOKENS_386>|": 152032,
328
+ "|<EXTRA_TOKENS_387>|": 152033,
329
+ "|<EXTRA_TOKENS_388>|": 152034,
330
+ "|<EXTRA_TOKENS_389>|": 152035,
331
+ "|<EXTRA_TOKENS_38>|": 151684,
332
+ "|<EXTRA_TOKENS_390>|": 152036,
333
+ "|<EXTRA_TOKENS_391>|": 152037,
334
+ "|<EXTRA_TOKENS_392>|": 152038,
335
+ "|<EXTRA_TOKENS_393>|": 152039,
336
+ "|<EXTRA_TOKENS_394>|": 152040,
337
+ "|<EXTRA_TOKENS_395>|": 152041,
338
+ "|<EXTRA_TOKENS_396>|": 152042,
339
+ "|<EXTRA_TOKENS_397>|": 152043,
340
+ "|<EXTRA_TOKENS_398>|": 152044,
341
+ "|<EXTRA_TOKENS_399>|": 152045,
342
+ "|<EXTRA_TOKENS_39>|": 151685,
343
+ "|<EXTRA_TOKENS_3>|": 151649,
344
+ "|<EXTRA_TOKENS_400>|": 152046,
345
+ "|<EXTRA_TOKENS_401>|": 152047,
346
+ "|<EXTRA_TOKENS_402>|": 152048,
347
+ "|<EXTRA_TOKENS_403>|": 152049,
348
+ "|<EXTRA_TOKENS_404>|": 152050,
349
+ "|<EXTRA_TOKENS_405>|": 152051,
350
+ "|<EXTRA_TOKENS_406>|": 152052,
351
+ "|<EXTRA_TOKENS_407>|": 152053,
352
+ "|<EXTRA_TOKENS_408>|": 152054,
353
+ "|<EXTRA_TOKENS_409>|": 152055,
354
+ "|<EXTRA_TOKENS_40>|": 151686,
355
+ "|<EXTRA_TOKENS_410>|": 152056,
356
+ "|<EXTRA_TOKENS_411>|": 152057,
357
+ "|<EXTRA_TOKENS_412>|": 152058,
358
+ "|<EXTRA_TOKENS_413>|": 152059,
359
+ "|<EXTRA_TOKENS_414>|": 152060,
360
+ "|<EXTRA_TOKENS_415>|": 152061,
361
+ "|<EXTRA_TOKENS_416>|": 152062,
362
+ "|<EXTRA_TOKENS_417>|": 152063,
363
+ "|<EXTRA_TOKENS_41>|": 151687,
364
+ "|<EXTRA_TOKENS_42>|": 151688,
365
+ "|<EXTRA_TOKENS_43>|": 151689,
366
+ "|<EXTRA_TOKENS_44>|": 151690,
367
+ "|<EXTRA_TOKENS_45>|": 151691,
368
+ "|<EXTRA_TOKENS_46>|": 151692,
369
+ "|<EXTRA_TOKENS_47>|": 151693,
370
+ "|<EXTRA_TOKENS_48>|": 151694,
371
+ "|<EXTRA_TOKENS_49>|": 151695,
372
+ "|<EXTRA_TOKENS_4>|": 151650,
373
+ "|<EXTRA_TOKENS_50>|": 151696,
374
+ "|<EXTRA_TOKENS_51>|": 151697,
375
+ "|<EXTRA_TOKENS_52>|": 151698,
376
+ "|<EXTRA_TOKENS_53>|": 151699,
377
+ "|<EXTRA_TOKENS_54>|": 151700,
378
+ "|<EXTRA_TOKENS_55>|": 151701,
379
+ "|<EXTRA_TOKENS_56>|": 151702,
380
+ "|<EXTRA_TOKENS_57>|": 151703,
381
+ "|<EXTRA_TOKENS_58>|": 151704,
382
+ "|<EXTRA_TOKENS_59>|": 151705,
383
+ "|<EXTRA_TOKENS_5>|": 151651,
384
+ "|<EXTRA_TOKENS_60>|": 151706,
385
+ "|<EXTRA_TOKENS_61>|": 151707,
386
+ "|<EXTRA_TOKENS_62>|": 151708,
387
+ "|<EXTRA_TOKENS_63>|": 151709,
388
+ "|<EXTRA_TOKENS_64>|": 151710,
389
+ "|<EXTRA_TOKENS_65>|": 151711,
390
+ "|<EXTRA_TOKENS_66>|": 151712,
391
+ "|<EXTRA_TOKENS_67>|": 151713,
392
+ "|<EXTRA_TOKENS_68>|": 151714,
393
+ "|<EXTRA_TOKENS_69>|": 151715,
394
+ "|<EXTRA_TOKENS_6>|": 151652,
395
+ "|<EXTRA_TOKENS_70>|": 151716,
396
+ "|<EXTRA_TOKENS_71>|": 151717,
397
+ "|<EXTRA_TOKENS_72>|": 151718,
398
+ "|<EXTRA_TOKENS_73>|": 151719,
399
+ "|<EXTRA_TOKENS_74>|": 151720,
400
+ "|<EXTRA_TOKENS_75>|": 151721,
401
+ "|<EXTRA_TOKENS_76>|": 151722,
402
+ "|<EXTRA_TOKENS_77>|": 151723,
403
+ "|<EXTRA_TOKENS_78>|": 151724,
404
+ "|<EXTRA_TOKENS_79>|": 151725,
405
+ "|<EXTRA_TOKENS_7>|": 151653,
406
+ "|<EXTRA_TOKENS_80>|": 151726,
407
+ "|<EXTRA_TOKENS_81>|": 151727,
408
+ "|<EXTRA_TOKENS_82>|": 151728,
409
+ "|<EXTRA_TOKENS_83>|": 151729,
410
+ "|<EXTRA_TOKENS_84>|": 151730,
411
+ "|<EXTRA_TOKENS_85>|": 151731,
412
+ "|<EXTRA_TOKENS_86>|": 151732,
413
+ "|<EXTRA_TOKENS_87>|": 151733,
414
+ "|<EXTRA_TOKENS_88>|": 151734,
415
+ "|<EXTRA_TOKENS_89>|": 151735,
416
+ "|<EXTRA_TOKENS_8>|": 151654,
417
+ "|<EXTRA_TOKENS_90>|": 151736,
418
+ "|<EXTRA_TOKENS_91>|": 151737,
419
+ "|<EXTRA_TOKENS_92>|": 151738,
420
+ "|<EXTRA_TOKENS_93>|": 151739,
421
+ "|<EXTRA_TOKENS_94>|": 151740,
422
+ "|<EXTRA_TOKENS_95>|": 151741,
423
+ "|<EXTRA_TOKENS_96>|": 151742,
424
+ "|<EXTRA_TOKENS_97>|": 151743,
425
+ "|<EXTRA_TOKENS_98>|": 151744,
426
+ "|<EXTRA_TOKENS_99>|": 151745,
427
+ "|<EXTRA_TOKENS_9>|": 151655
428
+ }
config.json ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "cyan2k/molmo-7B-D-bnb-4bit",
3
+ "architectures": [
4
+ "MolmoForCausalLM"
5
+ ],
6
+ "attention_layer_norm": false,
7
+ "auto_map": {
8
+ "AutoConfig": "config_molmo.MolmoConfig",
9
+ "AutoModelForCausalLM": "modeling_molmo.MolmoForCausalLM"
10
+ },
11
+ "clip_qkv": null,
12
+ "embedding_size": 152064,
13
+ "hidden_size": 3584,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 37888,
16
+ "layer_norm_eps": 1e-06,
17
+ "layer_norm_type": "rms",
18
+ "max_position_embeddings": 4096,
19
+ "model_type": "molmo",
20
+ "norm_after": false,
21
+ "num_attention_heads": 28,
22
+ "num_hidden_layers": 28,
23
+ "num_key_value_heads": 4,
24
+ "qkv_bias": true,
25
+ "quantization_config": {
26
+ "_load_in_4bit": true,
27
+ "_load_in_8bit": false,
28
+ "bnb_4bit_compute_dtype": "float32",
29
+ "bnb_4bit_quant_storage": "uint8",
30
+ "bnb_4bit_quant_type": "fp4",
31
+ "bnb_4bit_use_double_quant": true,
32
+ "llm_int8_enable_fp32_cpu_offload": false,
33
+ "llm_int8_has_fp16_weight": false,
34
+ "llm_int8_skip_modules": null,
35
+ "llm_int8_threshold": 6.0,
36
+ "load_in_4bit": true,
37
+ "load_in_8bit": false,
38
+ "quant_method": "bitsandbytes"
39
+ },
40
+ "rope_theta": 1000000.0,
41
+ "tie_word_embeddings": false,
42
+ "torch_dtype": "float32",
43
+ "transformers_version": "4.44.0",
44
+ "use_cache": true,
45
+ "use_position_ids": true,
46
+ "vocab_size": 152064,
47
+ "weight_tying": false
48
+ }
config_molmo.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List
2
+
3
+ from transformers import PretrainedConfig, AutoTokenizer
4
+
5
+
6
+ class MolmoConfig(PretrainedConfig):
7
+ model_type = "molmo"
8
+ keys_to_ignore_at_inference = ["past_key_values"]
9
+
10
+ def __init__(
11
+ self,
12
+ vocab_size=50304,
13
+ embedding_size=50304,
14
+ hidden_size=4096,
15
+ intermediate_size=11008,
16
+ num_hidden_layers=32,
17
+ num_attention_heads=32,
18
+ num_key_value_heads=None,
19
+ max_position_embeddings=2048,
20
+ initializer_range=0.02,
21
+ use_cache=True,
22
+ layer_norm_eps: float = 1e-5,
23
+ rope_theta=10000.0,
24
+ clip_qkv=None,
25
+ qkv_bias: bool = False,
26
+ weight_tying: bool = False,
27
+ use_position_ids: bool=True,
28
+ tie_word_embeddings: bool=True,
29
+ attention_layer_norm: bool=False,
30
+ norm_after: bool = False,
31
+ layer_norm_type: str="rms",
32
+ **kwargs,
33
+ ):
34
+ self.vocab_size = vocab_size
35
+ self.embedding_size = embedding_size
36
+ self.max_position_embeddings = max_position_embeddings
37
+ self.hidden_size = hidden_size
38
+ self.intermediate_size = intermediate_size
39
+ self.num_hidden_layers = num_hidden_layers
40
+ self.num_attention_heads = num_attention_heads
41
+ self.layer_norm_eps = layer_norm_eps
42
+ self.weight_tying = weight_tying
43
+ self.use_position_ids = use_position_ids
44
+ self.attention_layer_norm = attention_layer_norm
45
+ self.num_key_value_heads = num_key_value_heads
46
+ self.initializer_range = initializer_range
47
+ self.use_cache = use_cache
48
+ self.rope_theta = rope_theta
49
+ self.clip_qkv = clip_qkv
50
+ self.qkv_bias = qkv_bias
51
+ self.norm_after = norm_after
52
+ self.tie_word_embeddings = tie_word_embeddings
53
+ self.layer_norm_type = layer_norm_type
54
+
55
+ super().__init__(
56
+ tie_word_embeddings=tie_word_embeddings,
57
+ **kwargs,
58
+ )
59
+
60
+ MolmoConfig.register_for_auto_class()
generation_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "transformers_version": "4.44.0"
4
+ }
image_preprocessing_molmo.py ADDED
@@ -0,0 +1,569 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Image processor class for Molmo"""
2
+ from typing import List, Optional, Union, Mapping
3
+
4
+ import numpy as np
5
+ import einops
6
+ import torch
7
+ import torchvision.transforms
8
+ from torchvision.transforms import InterpolationMode
9
+ from torchvision.transforms.functional import convert_image_dtype
10
+
11
+ from transformers.image_utils import (
12
+ OPENAI_CLIP_MEAN,
13
+ OPENAI_CLIP_STD,
14
+ ImageInput,
15
+ is_valid_image,
16
+ )
17
+ from transformers.processing_utils import ImagesKwargs
18
+ from transformers.image_processing_utils import BaseImageProcessor, BatchFeature
19
+ from transformers.utils import TensorType, is_vision_available, logging
20
+
21
+
22
+ logger = logging.get_logger(__name__)
23
+
24
+
25
+ def make_batched_images(images) -> List[List[ImageInput]]:
26
+ """
27
+ Accepts images in list or nested list format, and makes a list of images for preprocessing.
28
+
29
+ Args:
30
+ images (`Union[List[List[ImageInput]], List[ImageInput], ImageInput]`):
31
+ The input image.
32
+
33
+ Returns:
34
+ list: A list of images.
35
+ """
36
+ if isinstance(images, (list, tuple)) and isinstance(images[0], (list, tuple)) and is_valid_image(images[0][0]):
37
+ return [img for img_list in images for img in img_list]
38
+
39
+ elif isinstance(images, (list, tuple)) and is_valid_image(images[0]):
40
+ return images
41
+
42
+ elif is_valid_image(images):
43
+ return [images]
44
+
45
+ raise ValueError(f"Could not make batched images from {images}")
46
+
47
+
48
+ def pad_to_bounding_box(
49
+ image, offset_height, offset_width, target_height,
50
+ target_width, value=0
51
+ ):
52
+ height, width = image.shape[:2]
53
+ after_padding_width = target_width - offset_width - width
54
+ after_padding_height = target_height - offset_height - height
55
+ return np.pad(image, [
56
+ [offset_height, after_padding_height],
57
+ [offset_width, after_padding_width],
58
+ [0, 0]
59
+ ], constant_values=value)
60
+
61
+
62
+ def normalize_image(image, offset, scale):
63
+ image -= np.array(offset, dtype=np.float32)[None, None, :]
64
+ image /= np.array(scale, dtype=np.float32)[None, None, :]
65
+ return image
66
+
67
+
68
+ def resize_and_pad(
69
+ image,
70
+ desired_output_size,
71
+ resize_method=InterpolationMode.BILINEAR,
72
+ pad_value=0,
73
+ normalize=True,
74
+ image_mean=OPENAI_CLIP_MEAN,
75
+ image_std=OPENAI_CLIP_STD,
76
+ ):
77
+ desired_height, desired_width = desired_output_size
78
+ height, width = image.shape[:2]
79
+
80
+ # Cast into float32 since the training code did this in float32 and it (very rarely) effects
81
+ # the results after rounding.
82
+ image_scale_y = np.array(desired_height, np.float32) / np.array(height, np.float32)
83
+ image_scale_x = np.array(desired_width, np.float32) / np.array(width, np.float32)
84
+ image_scale = min(image_scale_x, image_scale_y)
85
+ scaled_height = int(np.array(height, np.float32) * image_scale)
86
+ scaled_width = int(np.array(width, np.float32) * image_scale)
87
+
88
+ # if resize_method == "tensorflow":
89
+ # FIXME remove
90
+ import tensorflow as tf
91
+ image = tf.image.convert_image_dtype(tf.constant(image), dtype=tf.float32)
92
+ image = tf.image.resize(
93
+ image,
94
+ [scaled_height, scaled_width],
95
+ method=tf.image.ResizeMethod.BILINEAR,
96
+ antialias=True,
97
+ )
98
+ image = tf.clip_by_value(image, 0.0, 1.0)
99
+ image = image.numpy()
100
+ # else:
101
+ # image = torch.permute(torch.from_numpy(image), [2, 0, 1])
102
+ # image = convert_image_dtype(image) # resize in flaot32
103
+ # image = torchvision.transforms.Resize(
104
+ # [scaled_height, scaled_width], InterpolationMode.BILINEAR, antialias=True
105
+ # )(image)
106
+ # image = torch.clip(image, 0.0, 1.0)
107
+ # image = torch.permute(image, [1, 2, 0]).numpy()
108
+
109
+ top_pad = (desired_height - scaled_height) // 2
110
+ left_pad = (desired_width - scaled_width) // 2
111
+ padding = [
112
+ [top_pad, desired_height - scaled_height - top_pad],
113
+ [left_pad, desired_width - scaled_width - left_pad],
114
+ [0, 0]
115
+ ]
116
+ image_mask = np.pad(np.ones_like(image[:, :, 0], dtype=bool), padding[:2])
117
+ image = np.pad(image, padding, constant_values=pad_value)
118
+ if normalize:
119
+ image = normalize_image(image, offset=image_mean, scale=image_std)
120
+ return image, image_mask
121
+
122
+
123
+ def select_tiling(h, w, patch_size, max_num_patches):
124
+ """Decide how best to divide in image of size [w, h] in up to max_num_patches of size patch_size"""
125
+ original_size = np.stack([h, w]) # [1, 2]
126
+ original_res = h * w
127
+ tilings = []
128
+ for i in range(1, max_num_patches+1):
129
+ for j in range(1, max_num_patches+1):
130
+ if i*j <= max_num_patches:
131
+ tilings.append((i, j))
132
+ # sort so argmin and argmax favour smaller tilings in the event of a tie
133
+ tilings.sort(key=lambda x: (x[0]*x[1], x[0]))
134
+ candidate_tilings = np.array(tilings, dtype=np.int32) # [n_resolutions, 2]
135
+ candidate_resolutions = candidate_tilings * patch_size # [n_resolutions, 2]
136
+
137
+ # How much we would need to scale the image to fit exactly in each tiling
138
+ original_size = np.stack([h, w], dtype=np.float32) # [1, 2]
139
+ required_scale_d = candidate_resolutions.astype(np.float32) / original_size
140
+ required_scale = np.min(required_scale_d, axis=-1, keepdims=True) # [n_resolutions, 1]
141
+ if np.all(required_scale < 1):
142
+ # We are forced to downscale, so try to minimize the amount of downscaling
143
+ ix = np.argmax(required_scale)
144
+ else:
145
+ # Pick the resolution that required the least upscaling so that it most closely fits the image
146
+ required_scale = np.where(required_scale < 1.0, 10e9, required_scale)
147
+ ix = np.argmin(required_scale)
148
+ return candidate_tilings[ix]
149
+
150
+
151
+ class MolmoImagesKwargs(ImagesKwargs, total=False):
152
+ max_crops: Optional[int]
153
+ overlap_margins: Optional[List[int]]
154
+ base_image_input_size: Optional[List[int]]
155
+ image_token_length_w: Optional[int]
156
+ image_token_length_h: Optional[int]
157
+ image_patch_size: Optional[int]
158
+ image_padding_mask: Optional[bool]
159
+
160
+
161
+ class MolmoImageProcessor(BaseImageProcessor):
162
+ """Preprocess images and multi-model inputs"""
163
+
164
+ def __init__(
165
+ self,
166
+ max_crops: int = 12,
167
+ overlap_margins: List[int] = (4, 4),
168
+ base_image_input_size: List[int] = (336, 336),
169
+ image_token_length_w: int = 12,
170
+ image_token_length_h: int = 12,
171
+ image_patch_size: int = 14,
172
+ image_padding_mask: bool = True,
173
+ do_normalize: bool = True,
174
+ image_mean: Optional[Union[float, List[float]]] = None,
175
+ image_std: Optional[Union[float, List[float]]] = None,
176
+ **kwargs,
177
+ ):
178
+ super().__init__(**kwargs)
179
+ self.max_crops = max_crops
180
+ self.overlap_margins = overlap_margins
181
+ self.base_image_input_size = base_image_input_size
182
+ self.image_token_length_w = image_token_length_w
183
+ self.image_token_length_h = image_token_length_h
184
+ self.image_patch_size = image_patch_size
185
+ self.image_padding_mask = image_padding_mask
186
+ self.do_normalize = do_normalize
187
+ self.image_mean = image_mean if image_mean is not None else OPENAI_CLIP_MEAN
188
+ self.image_std = image_std if image_std is not None else OPENAI_CLIP_STD
189
+
190
+ def image_to_patches_and_tokens(
191
+ self,
192
+ image: ImageInput,
193
+ image_patch_token_id: int,
194
+ image_col_token_id: int,
195
+ image_start_token_id: int,
196
+ image_end_token_id: int,
197
+ max_crops: Optional[int] = None,
198
+ overlap_margins: Optional[List[int]] = None,
199
+ base_image_input_size: Optional[Union[int, List[int]]] = None,
200
+ image_token_length_w: Optional[int] = None,
201
+ image_token_length_h: Optional[int] = None,
202
+ image_patch_size: Optional[int] = None,
203
+ ):
204
+ """Preprocesses an image
205
+
206
+ Returns:
207
+ crops: (n_crops, n_patches, patch_dim) individual crops, `n_crops` might
208
+ change between images but the other dimension are fixed
209
+ tokens: (n_tokens,) int32 tokens, pad tokens indicating where to insert the
210
+ patch features, might include other special tokens as well
211
+ patch_ordering: (n_crops, n_tokens_per_crop) order image features should be inserted
212
+ into the `tokens`, negative values indicates patches features to exclude
213
+ padding_mask: (n_crops, n_patches) what percent of each crop is padding, be None
214
+ if the image mask is not being used.
215
+ """
216
+ if isinstance(base_image_input_size, int):
217
+ base_image_input_size = (base_image_input_size, base_image_input_size)
218
+
219
+ base_image_input_d = image_patch_size
220
+ tokens_per_image = image_token_length_w * image_token_length_h
221
+ image_base_patch_w = base_image_input_size[1] // base_image_input_d
222
+ image_base_patch_h = base_image_input_size[0] // base_image_input_d
223
+
224
+ original_image_h, original_image_w = image.shape[:2]
225
+ crop_size = base_image_input_size[0]
226
+
227
+ # Discard this many patches from the (left/top, right/bottom) of crops
228
+ left_margin, right_margin = overlap_margins
229
+ # left_margin, right_margin = 2, 2
230
+ assert left_margin % 2 == 0 # Required for compatibility with 2x2 pooling
231
+ total_margin_pixels = base_image_input_d*(right_margin + left_margin) # pixels removed per dim
232
+ crop_patches = base_image_input_size[0] // base_image_input_d # patches per crop dim
233
+ crop_window_patches = crop_patches - (right_margin + left_margin) # usable patches
234
+ crop_window_size = crop_window_patches * base_image_input_d
235
+ tiling = select_tiling(
236
+ original_image_h - total_margin_pixels,
237
+ original_image_w - total_margin_pixels,
238
+ crop_window_size,
239
+ max_crops
240
+ )
241
+ src, img_mask = resize_and_pad(
242
+ image,
243
+ [tiling[0]*crop_window_size+total_margin_pixels, tiling[1]*crop_window_size+total_margin_pixels]
244
+ )
245
+
246
+ # Now we have to split the image into crops, while keeping track of how each patch in the
247
+ # each crop should be ordered in the global image, this require a lot of tricky booking
248
+ n_crops = tiling[0] * tiling[1]
249
+ patches_arr = []
250
+ mask_arr = []
251
+ patch_ordering_arr = []
252
+
253
+ # We assume 2x2 pooling, but can allow padding the right/bottom with extra
254
+ # patches if the number of patches per side is not even
255
+ assert (crop_patches+1)//2 == image_token_length_h
256
+ assert (crop_patches+1)//2 == image_token_length_w
257
+ on = 0
258
+ on_patch = 0
259
+ for i in range(tiling[0]):
260
+ y0 = i*crop_window_size
261
+ if i == 0:
262
+ crop_y0 = 0
263
+ else:
264
+ crop_y0 = left_margin // 2
265
+
266
+ crop_h = image_base_patch_h - (right_margin + left_margin)
267
+ if i == 0:
268
+ crop_h += left_margin
269
+ if i == (tiling[0]-1):
270
+ crop_h += right_margin
271
+ for j in range(tiling[1]):
272
+ x0 = j*crop_window_size
273
+ if j == 0:
274
+ crop_x0 = 0
275
+ else:
276
+ crop_x0 = left_margin // 2
277
+
278
+ crop_w = image_base_patch_w - (right_margin + left_margin)
279
+ if j == 0:
280
+ crop_w += left_margin
281
+ if j == (tiling[1]-1):
282
+ crop_w += right_margin
283
+
284
+ pooled_w = (crop_w + 1) // 2
285
+ pooled_h = (crop_h + 1) // 2
286
+ patch_ordering_arr.append(
287
+ pad_to_bounding_box(
288
+ np.reshape(np.arange(on, on+pooled_h*pooled_w, dtype=np.int32), (pooled_h, pooled_w, 1)),
289
+ crop_y0, crop_x0, image_token_length_h, image_token_length_w, value=-1
290
+ )[:, :, 0]
291
+ )
292
+ patches_arr.append(src[y0:y0+crop_size, x0:x0+crop_size])
293
+ mask_arr.append(img_mask[y0:y0+crop_size, x0:x0+crop_size])
294
+
295
+ on += pooled_h*pooled_w
296
+ on_patch += 1
297
+ patches = np.stack(patches_arr)
298
+ patch_ordering = np.stack(patch_ordering_arr)
299
+ img_mask = np.stack(mask_arr)
300
+
301
+ # Switch to [n_crops, n_patches, pixels_per_patch] format
302
+ image_layout_impatch_w, image_layout_impatch_h = tiling[0], tiling[1]
303
+ patches = einops.rearrange(
304
+ patches, 'p (h dh) (w dw) c -> p (h w) (dh dw c)',
305
+ dh=base_image_input_d,
306
+ dw=base_image_input_d,
307
+ h=image_base_patch_h,
308
+ w=image_base_patch_w
309
+ )
310
+ img_mask = einops.rearrange(
311
+ img_mask, 'p (h dh) (w dw) -> p (h w) (dh dw)',
312
+ dh=base_image_input_d,
313
+ dw=base_image_input_d,
314
+ h=image_base_patch_h,
315
+ w=image_base_patch_w
316
+ )
317
+
318
+ img_mask = img_mask.astype(np.float32).mean(axis=-1)
319
+ patch_ordering = np.reshape(patch_ordering, [-1])
320
+ valid = patch_ordering >= 0
321
+
322
+ # Transpose order, to get left-to-right order instead of crop-by-crop order
323
+ patch_ordering_rh = np.reshape(
324
+ patch_ordering,
325
+ [tiling[0], tiling[1], image_token_length_h, image_token_length_w]
326
+ )
327
+ patch_ordering_rh = np.transpose(patch_ordering_rh, [0, 2, 1, 3])
328
+ patch_ordering_rh = np.reshape(patch_ordering_rh, [-1])
329
+
330
+ # The transpose will screw up which patches are masked, project the
331
+ # new order into sparse structure of `patch_ordering` to fix this
332
+ patch_ordering[valid] = patch_ordering_rh[patch_ordering_rh >= 0]
333
+
334
+ # Now build the output tokens
335
+ h = tiling[0] * crop_window_patches + (right_margin+left_margin)
336
+ w = tiling[1] * crop_window_patches + (right_margin+left_margin)
337
+ per_row = np.full(
338
+ ((w+1)//2,),
339
+ image_patch_token_id,
340
+ )
341
+ per_row = np.concatenate([per_row, [image_col_token_id]], 0)
342
+
343
+ joint = np.tile(per_row, [(h+1)//2])
344
+ joint = [
345
+ [image_start_token_id],
346
+ joint,
347
+ [image_end_token_id]
348
+ ]
349
+
350
+ # Finally do the same for the global image
351
+ resized, _ = resize_and_pad(image, base_image_input_size)
352
+ resized = einops.rearrange(
353
+ resized, '(h dh) (w dw) c -> (h w) (dh dw c)',
354
+ dh=base_image_input_d,
355
+ dw=base_image_input_d,
356
+ h=image_base_patch_h,
357
+ w=image_base_patch_w
358
+ )
359
+ patches = np.concatenate([np.expand_dims(resized, 0), patches], 0)
360
+
361
+ # Global image goes first, so the order of patches in previous crops gets increased
362
+ patch_ordering = np.where(
363
+ patch_ordering >= 0,
364
+ patch_ordering + tokens_per_image,
365
+ -1
366
+ )
367
+ patch_ordering = np.concatenate([np.arange(0, tokens_per_image), patch_ordering], 0)
368
+ per_row = np.full(
369
+ (image_token_length_w,),
370
+ image_patch_token_id,
371
+ )
372
+ per_row = np.concatenate([per_row, [image_col_token_id]], 0)
373
+ extra_tokens = np.tile(per_row, [image_token_length_h])
374
+ joint = [
375
+ [image_start_token_id],
376
+ extra_tokens,
377
+ [image_end_token_id],
378
+ ] + joint
379
+
380
+ joint = np.concatenate(joint, 0)
381
+ img_mask = np.pad(img_mask, [[0, 1], [0, 0]], constant_values=-1)
382
+ return patches, joint, patch_ordering, img_mask
383
+
384
+ def build_image_input_idx(
385
+ self,
386
+ image_tokens: np.ndarray,
387
+ patch_order: np.ndarray,
388
+ image_patch_token_id: int,
389
+ no_image: Optional[bool] = None,
390
+ image_token_length_w: Optional[int] = None,
391
+ image_token_length_h: Optional[int] = None,
392
+ ):
393
+ """Converts `patch_order` into a mapping of token_id -> patch_id"""
394
+
395
+ tokens_per_image = image_token_length_w * image_token_length_h
396
+ if no_image is not None and no_image:
397
+ return np.zeros((0, tokens_per_image), np.int32)
398
+
399
+ # Indices to insert the patches
400
+ image_input_idx = image_tokens == image_patch_token_id
401
+ image_input_idx = np.nonzero(image_input_idx)[0].astype(np.int32)
402
+
403
+ if patch_order is not None:
404
+ n_tokens = image_input_idx.shape[0]
405
+ patch_order = np.reshape(patch_order, [-1])
406
+ n_patches = patch_order.shape[0]
407
+
408
+ valid = patch_order >= 0
409
+ n_valid_patches = valid.sum()
410
+ assert len(image_input_idx) == n_valid_patches
411
+
412
+ sorted_patch_ixs = np.zeros([n_tokens], np.int32)
413
+ sorted_patch_ixs[patch_order[valid]] = np.arange(n_valid_patches, dtype=np.int32)
414
+
415
+ # Project the inverted mapping into same sparse structure
416
+ sorted_patch_ixs_ex = np.full(np.shape(patch_order), -1)
417
+ sorted_patch_ixs_ex[valid] = sorted_patch_ixs
418
+
419
+ # Do the gather and then re-masked outputs that were masked in `sorted_patch_ixs`
420
+ valid = (sorted_patch_ixs_ex >= 0).astype(np.int32)
421
+ image_input_idx = image_input_idx[sorted_patch_ixs_ex*valid]
422
+ image_input_idx = image_input_idx*valid - 100*(1 - valid)
423
+ image_input_idx = np.reshape(image_input_idx, [-1, tokens_per_image])
424
+ return image_input_idx
425
+
426
+ def preprocess(
427
+ self,
428
+ image: np.ndarray,
429
+ image_patch_token_id: int,
430
+ image_col_token_id: int,
431
+ image_start_token_id: int,
432
+ image_end_token_id: int,
433
+ max_crops: Optional[int] = None,
434
+ overlap_margins: Optional[List[int]] = None,
435
+ base_image_input_size: Optional[Union[int, List[int]]] = None,
436
+ image_token_length_w: Optional[int] = None,
437
+ image_token_length_h: Optional[int] = None,
438
+ image_patch_size: Optional[int] = None,
439
+ **kwargs,
440
+ ):
441
+ """Preprocesses a single image"""
442
+
443
+ max_crops = max_crops or self.max_crops
444
+ overlap_margins = overlap_margins or self.overlap_margins
445
+ base_image_input_size = base_image_input_size or self.base_image_input_size
446
+ image_token_length_w = image_token_length_w or self.image_token_length_w
447
+ image_token_length_h = image_token_length_h or self.image_token_length_h
448
+ image_patch_size = image_patch_size or self.image_patch_size
449
+
450
+ crops, image_tokens, patch_ordering, img_mask = self.image_to_patches_and_tokens(
451
+ image,
452
+ image_patch_token_id,
453
+ image_col_token_id,
454
+ image_start_token_id,
455
+ image_end_token_id,
456
+ max_crops,
457
+ overlap_margins,
458
+ base_image_input_size,
459
+ image_token_length_w,
460
+ image_token_length_h,
461
+ image_patch_size,
462
+ )
463
+ patch_idx = self.build_image_input_idx(
464
+ image_tokens,
465
+ patch_ordering,
466
+ image_patch_token_id,
467
+ image_token_length_w=image_token_length_w,
468
+ image_token_length_h=image_token_length_h,
469
+ )
470
+ return crops, image_tokens, patch_idx, img_mask
471
+
472
+ def multimodal_preprocess(
473
+ self,
474
+ images: np.ndarray,
475
+ tokens: List[int],
476
+ image_idx: np.ndarray,
477
+ sequence_length: int,
478
+ image_patch_token_id: int,
479
+ image_col_token_id: int,
480
+ image_start_token_id: int,
481
+ image_end_token_id: int,
482
+ **kwargs,
483
+ ):
484
+ """Merge images and text tokens into multi-modal features for the model
485
+
486
+ :param images: images to use as input
487
+ :param tokens: input text tokens
488
+ :param image_idx: where to insert the images into `tokens`
489
+ :params image_patch_token_id: id to use of tokens that will contain image features
490
+ :params image_col_token_id: token id for image column special tokens
491
+ :params image_start_token_id: token id for image start special tokens
492
+ :params image_end_token_id: token id for image end special tokens
493
+ :params kwargs: override preprocessor default args
494
+ """
495
+ max_total_crops = kwargs.get("max_crops") or self.max_crops
496
+ image_token_length_w = kwargs.get("image_token_length_w") or self.image_token_length_w
497
+ image_token_length_h = kwargs.get("image_token_length_h") or self.image_token_length_h
498
+ image_patch_size = kwargs.get("image_patch_size") or self.image_patch_size
499
+ base_image_input_size = kwargs.get("base_image_input_size") or self.base_image_input_size
500
+ image_num_patch = (
501
+ base_image_input_size[0] // image_patch_size,
502
+ base_image_input_size[1] // image_patch_size,
503
+ )
504
+ image_padding_mask = kwargs.get("image_padding_mask") or self.image_padding_mask
505
+
506
+ tokens_per_image = image_token_length_w * image_token_length_h
507
+ n_pixels = image_patch_size * image_patch_size * 3
508
+ n_patches = image_num_patch[0] * image_num_patch[1]
509
+
510
+ if images is None:
511
+ return {
512
+ "input_ids": tokens,
513
+ "images": None,
514
+ "image_input_idx": None
515
+ }
516
+ else:
517
+ n = len(images)
518
+ all_crops = []
519
+ all_image_idx = []
520
+ out_tokens = []
521
+ all_crop_masks = []
522
+
523
+ for ix in range(n):
524
+ token_ix = image_idx[ix]
525
+ crops, image_tokens, patch_idx, img_mask = self.preprocess(
526
+ images[ix],
527
+ image_patch_token_id,
528
+ image_col_token_id,
529
+ image_start_token_id,
530
+ image_end_token_id,
531
+ **kwargs,
532
+ )
533
+
534
+ if token_ix == -1: # -1 is an image inserted at the very start
535
+ start = 0
536
+ token_ix = 0
537
+ end = 0
538
+ else:
539
+ start = 0 if ix == 0 else image_idx[ix-1] + 1
540
+ end = token_ix + 1
541
+
542
+ all_image_idx.append(patch_idx + token_ix)
543
+ all_crops.append(crops)
544
+ out_tokens.append(tokens[start:token_ix])
545
+ out_tokens.append(image_tokens)
546
+ if ix == (n - 1):
547
+ out_tokens.append(tokens[end:])
548
+ if image_padding_mask:
549
+ all_crop_masks.append(img_mask)
550
+
551
+ input_ids = np.concatenate(out_tokens, 0)
552
+ images = np.concatenate(all_crops, 0)
553
+ image_input_idx = np.concatenate(all_image_idx, 0)
554
+ if image_padding_mask:
555
+ image_masks = np.concatenate(all_crop_masks, 0)
556
+ else:
557
+ image_masks = None
558
+
559
+ out = {
560
+ "input_ids": input_ids,
561
+ "images": images,
562
+ "image_input_idx": image_input_idx
563
+ }
564
+ if image_masks is not None:
565
+ out["image_masks"] = image_masks
566
+ return out
567
+
568
+
569
+ MolmoImageProcessor.register_for_auto_class()
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model-00001-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8f08d55da7152a5d2cfdbcecab6862e5c27d6b0e0eaf931675bf4caf9e8808c
3
+ size 4998427912
model-00002-of-00002.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c2f47912826c850d5387d8ef60990d9232408cd8302cb553ac3fefe98a0d9ee
3
+ size 2943573752
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
modeling_molmo.py ADDED
The diff for this file is too large to render. See raw diff
 
preprocessing_molmo.py ADDED
@@ -0,0 +1,173 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Processor class for Molmo.
3
+ """
4
+
5
+ from typing import Optional
6
+
7
+ try:
8
+ from typing import Unpack
9
+ except ImportError:
10
+ from typing_extensions import Unpack
11
+
12
+ import numpy as np
13
+ import torch
14
+
15
+ from transformers.image_utils import ImageInput
16
+ from transformers.processing_utils import (
17
+ TextKwargs,
18
+ ProcessingKwargs,
19
+ ProcessorMixin,
20
+ )
21
+
22
+ from transformers.tokenization_utils_base import TextInput
23
+ from transformers.utils import logging
24
+
25
+ from transformers import AutoTokenizer
26
+ from .image_preprocessing_molmo import MolmoImagesKwargs, make_batched_images, MolmoImageProcessor
27
+
28
+
29
+ logger = logging.get_logger(__name__)
30
+
31
+
32
+ DEFAULT_IMAGE_PATCH_TOKEN = f"<im_patch>"
33
+ DEFAULT_IM_START_TOKEN = f"<im_start>"
34
+ DEFAULT_IM_END_TOKEN = f"<im_end>"
35
+ DEFAULT_IM_COL_TOKEN = f"<im_col>"
36
+ IMAGE_PROMPT = "<|image|>"
37
+
38
+ EXTRA_TOKENS = (DEFAULT_IM_START_TOKEN, DEFAULT_IM_END_TOKEN, DEFAULT_IMAGE_PATCH_TOKEN, DEFAULT_IM_COL_TOKEN, IMAGE_PROMPT)
39
+
40
+
41
+ def get_special_token_ids(tokenizer):
42
+ ids = tokenizer.encode("".join(EXTRA_TOKENS), add_special_tokens=False)
43
+ assert len(ids) == len(EXTRA_TOKENS)
44
+ return {k: i for k, i in zip(EXTRA_TOKENS, ids)}
45
+
46
+
47
+ class MolmoTextKwargs(TextKwargs, total=False):
48
+ style: Optional[str]
49
+ system_prompt: Optional[str]
50
+ message_format: Optional[str]
51
+ always_start_with_space: Optional[bool]
52
+ sequence_length: Optional[int]
53
+
54
+
55
+ class MolmoProcessorKwargs(ProcessingKwargs, total=False):
56
+ text_kwargs: MolmoTextKwargs
57
+ images_kwargs: MolmoImagesKwargs
58
+ _defaults = {
59
+ "images_kwargs": {
60
+ "max_crops": 12,
61
+ "overlap_margins": [4, 4],
62
+ "base_image_input_size": [336, 336],
63
+ "image_token_length_w": 12,
64
+ "image_token_length_h": 12,
65
+ "image_patch_size": 14,
66
+ "image_padding_mask": True,
67
+ },
68
+ "text_kwargs": {
69
+ "style": "long_caption",
70
+ "system_prompt": "none",
71
+ "message_format": "role",
72
+ "always_start_with_space": True,
73
+ "sequence_length": 1536,
74
+ "padding": False,
75
+ },
76
+ }
77
+
78
+
79
+ class MolmoProcessor(ProcessorMixin):
80
+ attributes = ["image_processor", "tokenizer"]
81
+ image_processor_class = "AutoImageProcessor"
82
+ tokenizer_class = ("Qwen2Tokenizer", "Qwen2TokenizerFast")
83
+
84
+ def __init__(self, image_processor: MolmoImageProcessor = None, tokenizer : AutoTokenizer = None, **kwargs):
85
+ # self.image_processor = image_processor
86
+ # self.tokenizer = tokenizer
87
+ super().__init__(image_processor, tokenizer)
88
+ self._special_tokens = None
89
+
90
+ @property
91
+ def special_token_ids(self):
92
+ if self._special_tokens is None:
93
+ self._special_tokens = get_special_token_ids(self.tokenizer)
94
+ return self._special_tokens
95
+
96
+ def get_tokens_input(self, prompt, message_format, always_start_with_space):
97
+ if message_format == "none" or message_format is None:
98
+ pass
99
+ elif message_format == "role":
100
+ prompt = "User: " + prompt + " Assistant:"
101
+ else:
102
+ raise NotImplementedError(f"Message format {message_format} not implemented")
103
+
104
+ if always_start_with_space:
105
+ prompt = " " + prompt
106
+
107
+ tokens = self.tokenizer.encode(prompt, add_special_tokens=False)
108
+
109
+ return tokens
110
+
111
+ def process(
112
+ self,
113
+ text: TextInput = None,
114
+ images: ImageInput = None,
115
+ **kwargs: Unpack[MolmoProcessorKwargs],
116
+ ):
117
+ output_kwargs = self._merge_kwargs(
118
+ MolmoProcessorKwargs,
119
+ tokenizer_init_kwargs=self.tokenizer.init_kwargs,
120
+ **kwargs,
121
+ )
122
+
123
+ tokens = self.get_tokens_input(
124
+ text,
125
+ output_kwargs["text_kwargs"]["message_format"],
126
+ output_kwargs["text_kwargs"]["always_start_with_space"],
127
+ )
128
+
129
+ image_token_id = self.special_token_ids[IMAGE_PROMPT]
130
+
131
+ if images is not None:
132
+ images = make_batched_images(images)
133
+ images = [np.array(image).astype(np.uint8) for image in images]
134
+ # For now only support inserting images at the start
135
+ image_idx = [-1]*len(images)
136
+ else:
137
+ image_idx = None
138
+
139
+ sequence_length = output_kwargs["text_kwargs"]["sequence_length"]
140
+
141
+ image_patch_token_id = self.special_token_ids[DEFAULT_IMAGE_PATCH_TOKEN]
142
+ image_col_token_id = self.special_token_ids[DEFAULT_IM_COL_TOKEN]
143
+ image_start_token_id = self.special_token_ids[DEFAULT_IM_START_TOKEN]
144
+ image_end_token_id = self.special_token_ids[DEFAULT_IM_END_TOKEN]
145
+ out = self.image_processor.multimodal_preprocess(
146
+ images=images,
147
+ image_idx=image_idx,
148
+ tokens=np.asarray(tokens).astype(np.int32),
149
+ sequence_length=sequence_length,
150
+ image_patch_token_id=image_patch_token_id,
151
+ image_col_token_id=image_col_token_id,
152
+ image_start_token_id=image_start_token_id,
153
+ image_end_token_id=image_end_token_id,
154
+ **output_kwargs["images_kwargs"]
155
+ )
156
+
157
+ # Prepend BOS
158
+ # qwen2 and olmo do not have a BOS, and instead use EOS as a generic seperator token.
159
+ bos = self.tokenizer.bos_token_id or self.tokenizer.eos_token_id
160
+ decoder_input_tokens = np.pad(out["input_ids"], [[1, 0]], constant_values=bos)
161
+ out["input_ids"] = decoder_input_tokens
162
+ if "image_input_idx" in out:
163
+ # Shift patch mapping up by one since we added BOS
164
+ image_input_idx = out["image_input_idx"]
165
+ out["image_input_idx"] = np.where(image_input_idx < 0, image_input_idx, image_input_idx + 1)
166
+
167
+ for k, v in out.items():
168
+ out[k] = torch.from_numpy(v)
169
+
170
+ return out
171
+
172
+
173
+ MolmoProcessor.register_for_auto_class()
preprocessor_config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_map": {
3
+ "AutoImageProcessor": "image_preprocessing_molmo.MolmoImageProcessor",
4
+ "AutoProcessor": "preprocessing_molmo.MolmoProcessor"
5
+ },
6
+ "base_image_input_size": [
7
+ 336,
8
+ 336
9
+ ],
10
+ "do_normalize": true,
11
+ "image_mean": [
12
+ 0.48145466,
13
+ 0.4578275,
14
+ 0.40821073
15
+ ],
16
+ "image_padding_mask": true,
17
+ "image_patch_size": 14,
18
+ "image_processor_type": "MolmoImageProcessor",
19
+ "image_std": [
20
+ 0.26862954,
21
+ 0.26130258,
22
+ 0.27577711
23
+ ],
24
+ "image_token_length_h": 12,
25
+ "image_token_length_w": 12,
26
+ "max_crops": 12,
27
+ "overlap_margins": [
28
+ 4,
29
+ 4
30
+ ],
31
+ "processor_class": "MolmoProcessor"
32
+ }
processor_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "auto_map": {
3
+ "AutoProcessor": "preprocessing_molmo.MolmoProcessor"
4
+ },
5
+ "processor_class": "MolmoProcessor"
6
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,441 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "|<EXTRA_TOKENS_0>|",
4
+ "|<EXTRA_TOKENS_1>|",
5
+ "|<EXTRA_TOKENS_2>|",
6
+ "|<EXTRA_TOKENS_3>|",
7
+ "|<EXTRA_TOKENS_4>|",
8
+ "|<EXTRA_TOKENS_5>|",
9
+ "|<EXTRA_TOKENS_6>|",
10
+ "|<EXTRA_TOKENS_7>|",
11
+ "|<EXTRA_TOKENS_8>|",
12
+ "|<EXTRA_TOKENS_9>|",
13
+ "|<EXTRA_TOKENS_10>|",
14
+ "|<EXTRA_TOKENS_11>|",
15
+ "|<EXTRA_TOKENS_12>|",
16
+ "|<EXTRA_TOKENS_13>|",
17
+ "|<EXTRA_TOKENS_14>|",
18
+ "|<EXTRA_TOKENS_15>|",
19
+ "|<EXTRA_TOKENS_16>|",
20
+ "|<EXTRA_TOKENS_17>|",
21
+ "|<EXTRA_TOKENS_18>|",
22
+ "|<EXTRA_TOKENS_19>|",
23
+ "|<EXTRA_TOKENS_20>|",
24
+ "|<EXTRA_TOKENS_21>|",
25
+ "|<EXTRA_TOKENS_22>|",
26
+ "|<EXTRA_TOKENS_23>|",
27
+ "|<EXTRA_TOKENS_24>|",
28
+ "|<EXTRA_TOKENS_25>|",
29
+ "|<EXTRA_TOKENS_26>|",
30
+ "|<EXTRA_TOKENS_27>|",
31
+ "|<EXTRA_TOKENS_28>|",
32
+ "|<EXTRA_TOKENS_29>|",
33
+ "|<EXTRA_TOKENS_30>|",
34
+ "|<EXTRA_TOKENS_31>|",
35
+ "|<EXTRA_TOKENS_32>|",
36
+ "|<EXTRA_TOKENS_33>|",
37
+ "|<EXTRA_TOKENS_34>|",
38
+ "|<EXTRA_TOKENS_35>|",
39
+ "|<EXTRA_TOKENS_36>|",
40
+ "|<EXTRA_TOKENS_37>|",
41
+ "|<EXTRA_TOKENS_38>|",
42
+ "|<EXTRA_TOKENS_39>|",
43
+ "|<EXTRA_TOKENS_40>|",
44
+ "|<EXTRA_TOKENS_41>|",
45
+ "|<EXTRA_TOKENS_42>|",
46
+ "|<EXTRA_TOKENS_43>|",
47
+ "|<EXTRA_TOKENS_44>|",
48
+ "|<EXTRA_TOKENS_45>|",
49
+ "|<EXTRA_TOKENS_46>|",
50
+ "|<EXTRA_TOKENS_47>|",
51
+ "|<EXTRA_TOKENS_48>|",
52
+ "|<EXTRA_TOKENS_49>|",
53
+ "|<EXTRA_TOKENS_50>|",
54
+ "|<EXTRA_TOKENS_51>|",
55
+ "|<EXTRA_TOKENS_52>|",
56
+ "|<EXTRA_TOKENS_53>|",
57
+ "|<EXTRA_TOKENS_54>|",
58
+ "|<EXTRA_TOKENS_55>|",
59
+ "|<EXTRA_TOKENS_56>|",
60
+ "|<EXTRA_TOKENS_57>|",
61
+ "|<EXTRA_TOKENS_58>|",
62
+ "|<EXTRA_TOKENS_59>|",
63
+ "|<EXTRA_TOKENS_60>|",
64
+ "|<EXTRA_TOKENS_61>|",
65
+ "|<EXTRA_TOKENS_62>|",
66
+ "|<EXTRA_TOKENS_63>|",
67
+ "|<EXTRA_TOKENS_64>|",
68
+ "|<EXTRA_TOKENS_65>|",
69
+ "|<EXTRA_TOKENS_66>|",
70
+ "|<EXTRA_TOKENS_67>|",
71
+ "|<EXTRA_TOKENS_68>|",
72
+ "|<EXTRA_TOKENS_69>|",
73
+ "|<EXTRA_TOKENS_70>|",
74
+ "|<EXTRA_TOKENS_71>|",
75
+ "|<EXTRA_TOKENS_72>|",
76
+ "|<EXTRA_TOKENS_73>|",
77
+ "|<EXTRA_TOKENS_74>|",
78
+ "|<EXTRA_TOKENS_75>|",
79
+ "|<EXTRA_TOKENS_76>|",
80
+ "|<EXTRA_TOKENS_77>|",
81
+ "|<EXTRA_TOKENS_78>|",
82
+ "|<EXTRA_TOKENS_79>|",
83
+ "|<EXTRA_TOKENS_80>|",
84
+ "|<EXTRA_TOKENS_81>|",
85
+ "|<EXTRA_TOKENS_82>|",
86
+ "|<EXTRA_TOKENS_83>|",
87
+ "|<EXTRA_TOKENS_84>|",
88
+ "|<EXTRA_TOKENS_85>|",
89
+ "|<EXTRA_TOKENS_86>|",
90
+ "|<EXTRA_TOKENS_87>|",
91
+ "|<EXTRA_TOKENS_88>|",
92
+ "|<EXTRA_TOKENS_89>|",
93
+ "|<EXTRA_TOKENS_90>|",
94
+ "|<EXTRA_TOKENS_91>|",
95
+ "|<EXTRA_TOKENS_92>|",
96
+ "|<EXTRA_TOKENS_93>|",
97
+ "|<EXTRA_TOKENS_94>|",
98
+ "|<EXTRA_TOKENS_95>|",
99
+ "|<EXTRA_TOKENS_96>|",
100
+ "|<EXTRA_TOKENS_97>|",
101
+ "|<EXTRA_TOKENS_98>|",
102
+ "|<EXTRA_TOKENS_99>|",
103
+ "|<EXTRA_TOKENS_100>|",
104
+ "|<EXTRA_TOKENS_101>|",
105
+ "|<EXTRA_TOKENS_102>|",
106
+ "|<EXTRA_TOKENS_103>|",
107
+ "|<EXTRA_TOKENS_104>|",
108
+ "|<EXTRA_TOKENS_105>|",
109
+ "|<EXTRA_TOKENS_106>|",
110
+ "|<EXTRA_TOKENS_107>|",
111
+ "|<EXTRA_TOKENS_108>|",
112
+ "|<EXTRA_TOKENS_109>|",
113
+ "|<EXTRA_TOKENS_110>|",
114
+ "|<EXTRA_TOKENS_111>|",
115
+ "|<EXTRA_TOKENS_112>|",
116
+ "|<EXTRA_TOKENS_113>|",
117
+ "|<EXTRA_TOKENS_114>|",
118
+ "|<EXTRA_TOKENS_115>|",
119
+ "|<EXTRA_TOKENS_116>|",
120
+ "|<EXTRA_TOKENS_117>|",
121
+ "|<EXTRA_TOKENS_118>|",
122
+ "|<EXTRA_TOKENS_119>|",
123
+ "|<EXTRA_TOKENS_120>|",
124
+ "|<EXTRA_TOKENS_121>|",
125
+ "|<EXTRA_TOKENS_122>|",
126
+ "|<EXTRA_TOKENS_123>|",
127
+ "|<EXTRA_TOKENS_124>|",
128
+ "|<EXTRA_TOKENS_125>|",
129
+ "|<EXTRA_TOKENS_126>|",
130
+ "|<EXTRA_TOKENS_127>|",
131
+ "|<EXTRA_TOKENS_128>|",
132
+ "|<EXTRA_TOKENS_129>|",
133
+ "|<EXTRA_TOKENS_130>|",
134
+ "|<EXTRA_TOKENS_131>|",
135
+ "|<EXTRA_TOKENS_132>|",
136
+ "|<EXTRA_TOKENS_133>|",
137
+ "|<EXTRA_TOKENS_134>|",
138
+ "|<EXTRA_TOKENS_135>|",
139
+ "|<EXTRA_TOKENS_136>|",
140
+ "|<EXTRA_TOKENS_137>|",
141
+ "|<EXTRA_TOKENS_138>|",
142
+ "|<EXTRA_TOKENS_139>|",
143
+ "|<EXTRA_TOKENS_140>|",
144
+ "|<EXTRA_TOKENS_141>|",
145
+ "|<EXTRA_TOKENS_142>|",
146
+ "|<EXTRA_TOKENS_143>|",
147
+ "|<EXTRA_TOKENS_144>|",
148
+ "|<EXTRA_TOKENS_145>|",
149
+ "|<EXTRA_TOKENS_146>|",
150
+ "|<EXTRA_TOKENS_147>|",
151
+ "|<EXTRA_TOKENS_148>|",
152
+ "|<EXTRA_TOKENS_149>|",
153
+ "|<EXTRA_TOKENS_150>|",
154
+ "|<EXTRA_TOKENS_151>|",
155
+ "|<EXTRA_TOKENS_152>|",
156
+ "|<EXTRA_TOKENS_153>|",
157
+ "|<EXTRA_TOKENS_154>|",
158
+ "|<EXTRA_TOKENS_155>|",
159
+ "|<EXTRA_TOKENS_156>|",
160
+ "|<EXTRA_TOKENS_157>|",
161
+ "|<EXTRA_TOKENS_158>|",
162
+ "|<EXTRA_TOKENS_159>|",
163
+ "|<EXTRA_TOKENS_160>|",
164
+ "|<EXTRA_TOKENS_161>|",
165
+ "|<EXTRA_TOKENS_162>|",
166
+ "|<EXTRA_TOKENS_163>|",
167
+ "|<EXTRA_TOKENS_164>|",
168
+ "|<EXTRA_TOKENS_165>|",
169
+ "|<EXTRA_TOKENS_166>|",
170
+ "|<EXTRA_TOKENS_167>|",
171
+ "|<EXTRA_TOKENS_168>|",
172
+ "|<EXTRA_TOKENS_169>|",
173
+ "|<EXTRA_TOKENS_170>|",
174
+ "|<EXTRA_TOKENS_171>|",
175
+ "|<EXTRA_TOKENS_172>|",
176
+ "|<EXTRA_TOKENS_173>|",
177
+ "|<EXTRA_TOKENS_174>|",
178
+ "|<EXTRA_TOKENS_175>|",
179
+ "|<EXTRA_TOKENS_176>|",
180
+ "|<EXTRA_TOKENS_177>|",
181
+ "|<EXTRA_TOKENS_178>|",
182
+ "|<EXTRA_TOKENS_179>|",
183
+ "|<EXTRA_TOKENS_180>|",
184
+ "|<EXTRA_TOKENS_181>|",
185
+ "|<EXTRA_TOKENS_182>|",
186
+ "|<EXTRA_TOKENS_183>|",
187
+ "|<EXTRA_TOKENS_184>|",
188
+ "|<EXTRA_TOKENS_185>|",
189
+ "|<EXTRA_TOKENS_186>|",
190
+ "|<EXTRA_TOKENS_187>|",
191
+ "|<EXTRA_TOKENS_188>|",
192
+ "|<EXTRA_TOKENS_189>|",
193
+ "|<EXTRA_TOKENS_190>|",
194
+ "|<EXTRA_TOKENS_191>|",
195
+ "|<EXTRA_TOKENS_192>|",
196
+ "|<EXTRA_TOKENS_193>|",
197
+ "|<EXTRA_TOKENS_194>|",
198
+ "|<EXTRA_TOKENS_195>|",
199
+ "|<EXTRA_TOKENS_196>|",
200
+ "|<EXTRA_TOKENS_197>|",
201
+ "|<EXTRA_TOKENS_198>|",
202
+ "|<EXTRA_TOKENS_199>|",
203
+ "|<EXTRA_TOKENS_200>|",
204
+ "|<EXTRA_TOKENS_201>|",
205
+ "|<EXTRA_TOKENS_202>|",
206
+ "|<EXTRA_TOKENS_203>|",
207
+ "|<EXTRA_TOKENS_204>|",
208
+ "|<EXTRA_TOKENS_205>|",
209
+ "|<EXTRA_TOKENS_206>|",
210
+ "|<EXTRA_TOKENS_207>|",
211
+ "|<EXTRA_TOKENS_208>|",
212
+ "|<EXTRA_TOKENS_209>|",
213
+ "|<EXTRA_TOKENS_210>|",
214
+ "|<EXTRA_TOKENS_211>|",
215
+ "|<EXTRA_TOKENS_212>|",
216
+ "|<EXTRA_TOKENS_213>|",
217
+ "|<EXTRA_TOKENS_214>|",
218
+ "|<EXTRA_TOKENS_215>|",
219
+ "|<EXTRA_TOKENS_216>|",
220
+ "|<EXTRA_TOKENS_217>|",
221
+ "|<EXTRA_TOKENS_218>|",
222
+ "|<EXTRA_TOKENS_219>|",
223
+ "|<EXTRA_TOKENS_220>|",
224
+ "|<EXTRA_TOKENS_221>|",
225
+ "|<EXTRA_TOKENS_222>|",
226
+ "|<EXTRA_TOKENS_223>|",
227
+ "|<EXTRA_TOKENS_224>|",
228
+ "|<EXTRA_TOKENS_225>|",
229
+ "|<EXTRA_TOKENS_226>|",
230
+ "|<EXTRA_TOKENS_227>|",
231
+ "|<EXTRA_TOKENS_228>|",
232
+ "|<EXTRA_TOKENS_229>|",
233
+ "|<EXTRA_TOKENS_230>|",
234
+ "|<EXTRA_TOKENS_231>|",
235
+ "|<EXTRA_TOKENS_232>|",
236
+ "|<EXTRA_TOKENS_233>|",
237
+ "|<EXTRA_TOKENS_234>|",
238
+ "|<EXTRA_TOKENS_235>|",
239
+ "|<EXTRA_TOKENS_236>|",
240
+ "|<EXTRA_TOKENS_237>|",
241
+ "|<EXTRA_TOKENS_238>|",
242
+ "|<EXTRA_TOKENS_239>|",
243
+ "|<EXTRA_TOKENS_240>|",
244
+ "|<EXTRA_TOKENS_241>|",
245
+ "|<EXTRA_TOKENS_242>|",
246
+ "|<EXTRA_TOKENS_243>|",
247
+ "|<EXTRA_TOKENS_244>|",
248
+ "|<EXTRA_TOKENS_245>|",
249
+ "|<EXTRA_TOKENS_246>|",
250
+ "|<EXTRA_TOKENS_247>|",
251
+ "|<EXTRA_TOKENS_248>|",
252
+ "|<EXTRA_TOKENS_249>|",
253
+ "|<EXTRA_TOKENS_250>|",
254
+ "|<EXTRA_TOKENS_251>|",
255
+ "|<EXTRA_TOKENS_252>|",
256
+ "|<EXTRA_TOKENS_253>|",
257
+ "|<EXTRA_TOKENS_254>|",
258
+ "|<EXTRA_TOKENS_255>|",
259
+ "|<EXTRA_TOKENS_256>|",
260
+ "|<EXTRA_TOKENS_257>|",
261
+ "|<EXTRA_TOKENS_258>|",
262
+ "|<EXTRA_TOKENS_259>|",
263
+ "|<EXTRA_TOKENS_260>|",
264
+ "|<EXTRA_TOKENS_261>|",
265
+ "|<EXTRA_TOKENS_262>|",
266
+ "|<EXTRA_TOKENS_263>|",
267
+ "|<EXTRA_TOKENS_264>|",
268
+ "|<EXTRA_TOKENS_265>|",
269
+ "|<EXTRA_TOKENS_266>|",
270
+ "|<EXTRA_TOKENS_267>|",
271
+ "|<EXTRA_TOKENS_268>|",
272
+ "|<EXTRA_TOKENS_269>|",
273
+ "|<EXTRA_TOKENS_270>|",
274
+ "|<EXTRA_TOKENS_271>|",
275
+ "|<EXTRA_TOKENS_272>|",
276
+ "|<EXTRA_TOKENS_273>|",
277
+ "|<EXTRA_TOKENS_274>|",
278
+ "|<EXTRA_TOKENS_275>|",
279
+ "|<EXTRA_TOKENS_276>|",
280
+ "|<EXTRA_TOKENS_277>|",
281
+ "|<EXTRA_TOKENS_278>|",
282
+ "|<EXTRA_TOKENS_279>|",
283
+ "|<EXTRA_TOKENS_280>|",
284
+ "|<EXTRA_TOKENS_281>|",
285
+ "|<EXTRA_TOKENS_282>|",
286
+ "|<EXTRA_TOKENS_283>|",
287
+ "|<EXTRA_TOKENS_284>|",
288
+ "|<EXTRA_TOKENS_285>|",
289
+ "|<EXTRA_TOKENS_286>|",
290
+ "|<EXTRA_TOKENS_287>|",
291
+ "|<EXTRA_TOKENS_288>|",
292
+ "|<EXTRA_TOKENS_289>|",
293
+ "|<EXTRA_TOKENS_290>|",
294
+ "|<EXTRA_TOKENS_291>|",
295
+ "|<EXTRA_TOKENS_292>|",
296
+ "|<EXTRA_TOKENS_293>|",
297
+ "|<EXTRA_TOKENS_294>|",
298
+ "|<EXTRA_TOKENS_295>|",
299
+ "|<EXTRA_TOKENS_296>|",
300
+ "|<EXTRA_TOKENS_297>|",
301
+ "|<EXTRA_TOKENS_298>|",
302
+ "|<EXTRA_TOKENS_299>|",
303
+ "|<EXTRA_TOKENS_300>|",
304
+ "|<EXTRA_TOKENS_301>|",
305
+ "|<EXTRA_TOKENS_302>|",
306
+ "|<EXTRA_TOKENS_303>|",
307
+ "|<EXTRA_TOKENS_304>|",
308
+ "|<EXTRA_TOKENS_305>|",
309
+ "|<EXTRA_TOKENS_306>|",
310
+ "|<EXTRA_TOKENS_307>|",
311
+ "|<EXTRA_TOKENS_308>|",
312
+ "|<EXTRA_TOKENS_309>|",
313
+ "|<EXTRA_TOKENS_310>|",
314
+ "|<EXTRA_TOKENS_311>|",
315
+ "|<EXTRA_TOKENS_312>|",
316
+ "|<EXTRA_TOKENS_313>|",
317
+ "|<EXTRA_TOKENS_314>|",
318
+ "|<EXTRA_TOKENS_315>|",
319
+ "|<EXTRA_TOKENS_316>|",
320
+ "|<EXTRA_TOKENS_317>|",
321
+ "|<EXTRA_TOKENS_318>|",
322
+ "|<EXTRA_TOKENS_319>|",
323
+ "|<EXTRA_TOKENS_320>|",
324
+ "|<EXTRA_TOKENS_321>|",
325
+ "|<EXTRA_TOKENS_322>|",
326
+ "|<EXTRA_TOKENS_323>|",
327
+ "|<EXTRA_TOKENS_324>|",
328
+ "|<EXTRA_TOKENS_325>|",
329
+ "|<EXTRA_TOKENS_326>|",
330
+ "|<EXTRA_TOKENS_327>|",
331
+ "|<EXTRA_TOKENS_328>|",
332
+ "|<EXTRA_TOKENS_329>|",
333
+ "|<EXTRA_TOKENS_330>|",
334
+ "|<EXTRA_TOKENS_331>|",
335
+ "|<EXTRA_TOKENS_332>|",
336
+ "|<EXTRA_TOKENS_333>|",
337
+ "|<EXTRA_TOKENS_334>|",
338
+ "|<EXTRA_TOKENS_335>|",
339
+ "|<EXTRA_TOKENS_336>|",
340
+ "|<EXTRA_TOKENS_337>|",
341
+ "|<EXTRA_TOKENS_338>|",
342
+ "|<EXTRA_TOKENS_339>|",
343
+ "|<EXTRA_TOKENS_340>|",
344
+ "|<EXTRA_TOKENS_341>|",
345
+ "|<EXTRA_TOKENS_342>|",
346
+ "|<EXTRA_TOKENS_343>|",
347
+ "|<EXTRA_TOKENS_344>|",
348
+ "|<EXTRA_TOKENS_345>|",
349
+ "|<EXTRA_TOKENS_346>|",
350
+ "|<EXTRA_TOKENS_347>|",
351
+ "|<EXTRA_TOKENS_348>|",
352
+ "|<EXTRA_TOKENS_349>|",
353
+ "|<EXTRA_TOKENS_350>|",
354
+ "|<EXTRA_TOKENS_351>|",
355
+ "|<EXTRA_TOKENS_352>|",
356
+ "|<EXTRA_TOKENS_353>|",
357
+ "|<EXTRA_TOKENS_354>|",
358
+ "|<EXTRA_TOKENS_355>|",
359
+ "|<EXTRA_TOKENS_356>|",
360
+ "|<EXTRA_TOKENS_357>|",
361
+ "|<EXTRA_TOKENS_358>|",
362
+ "|<EXTRA_TOKENS_359>|",
363
+ "|<EXTRA_TOKENS_360>|",
364
+ "|<EXTRA_TOKENS_361>|",
365
+ "|<EXTRA_TOKENS_362>|",
366
+ "|<EXTRA_TOKENS_363>|",
367
+ "|<EXTRA_TOKENS_364>|",
368
+ "|<EXTRA_TOKENS_365>|",
369
+ "|<EXTRA_TOKENS_366>|",
370
+ "|<EXTRA_TOKENS_367>|",
371
+ "|<EXTRA_TOKENS_368>|",
372
+ "|<EXTRA_TOKENS_369>|",
373
+ "|<EXTRA_TOKENS_370>|",
374
+ "|<EXTRA_TOKENS_371>|",
375
+ "|<EXTRA_TOKENS_372>|",
376
+ "|<EXTRA_TOKENS_373>|",
377
+ "|<EXTRA_TOKENS_374>|",
378
+ "|<EXTRA_TOKENS_375>|",
379
+ "|<EXTRA_TOKENS_376>|",
380
+ "|<EXTRA_TOKENS_377>|",
381
+ "|<EXTRA_TOKENS_378>|",
382
+ "|<EXTRA_TOKENS_379>|",
383
+ "|<EXTRA_TOKENS_380>|",
384
+ "|<EXTRA_TOKENS_381>|",
385
+ "|<EXTRA_TOKENS_382>|",
386
+ "|<EXTRA_TOKENS_383>|",
387
+ "|<EXTRA_TOKENS_384>|",
388
+ "|<EXTRA_TOKENS_385>|",
389
+ "|<EXTRA_TOKENS_386>|",
390
+ "|<EXTRA_TOKENS_387>|",
391
+ "|<EXTRA_TOKENS_388>|",
392
+ "|<EXTRA_TOKENS_389>|",
393
+ "|<EXTRA_TOKENS_390>|",
394
+ "|<EXTRA_TOKENS_391>|",
395
+ "|<EXTRA_TOKENS_392>|",
396
+ "|<EXTRA_TOKENS_393>|",
397
+ "|<EXTRA_TOKENS_394>|",
398
+ "|<EXTRA_TOKENS_395>|",
399
+ "|<EXTRA_TOKENS_396>|",
400
+ "|<EXTRA_TOKENS_397>|",
401
+ "|<EXTRA_TOKENS_398>|",
402
+ "|<EXTRA_TOKENS_399>|",
403
+ "|<EXTRA_TOKENS_400>|",
404
+ "|<EXTRA_TOKENS_401>|",
405
+ "|<EXTRA_TOKENS_402>|",
406
+ "|<EXTRA_TOKENS_403>|",
407
+ "|<EXTRA_TOKENS_404>|",
408
+ "|<EXTRA_TOKENS_405>|",
409
+ "|<EXTRA_TOKENS_406>|",
410
+ "|<EXTRA_TOKENS_407>|",
411
+ "|<EXTRA_TOKENS_408>|",
412
+ "|<EXTRA_TOKENS_409>|",
413
+ "|<EXTRA_TOKENS_410>|",
414
+ "|<EXTRA_TOKENS_411>|",
415
+ "|<EXTRA_TOKENS_412>|",
416
+ "|<EXTRA_TOKENS_413>|",
417
+ "|<EXTRA_TOKENS_414>|",
418
+ "|<EXTRA_TOKENS_415>|",
419
+ "|<EXTRA_TOKENS_416>|",
420
+ "|<EXTRA_TOKENS_417>|",
421
+ "<im_start>",
422
+ "<im_end>",
423
+ "<im_patch>",
424
+ "<im_col>",
425
+ "<|image|>"
426
+ ],
427
+ "eos_token": {
428
+ "content": "<|endoftext|>",
429
+ "lstrip": false,
430
+ "normalized": false,
431
+ "rstrip": false,
432
+ "single_word": false
433
+ },
434
+ "pad_token": {
435
+ "content": "<|endoftext|>",
436
+ "lstrip": false,
437
+ "normalized": false,
438
+ "rstrip": false,
439
+ "single_word": false
440
+ }
441
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,3853 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "151643": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "151644": {
13
+ "content": "<|im_start|>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "151645": {
21
+ "content": "<|im_end|>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "151646": {
29
+ "content": "|<EXTRA_TOKENS_0>|",
30
+ "lstrip": false,
31
+ "normalized": false,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "151647": {
37
+ "content": "|<EXTRA_TOKENS_1>|",
38
+ "lstrip": false,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ },
44
+ "151648": {
45
+ "content": "|<EXTRA_TOKENS_2>|",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false,
50
+ "special": true
51
+ },
52
+ "151649": {
53
+ "content": "|<EXTRA_TOKENS_3>|",
54
+ "lstrip": false,
55
+ "normalized": false,
56
+ "rstrip": false,
57
+ "single_word": false,
58
+ "special": true
59
+ },
60
+ "151650": {
61
+ "content": "|<EXTRA_TOKENS_4>|",
62
+ "lstrip": false,
63
+ "normalized": false,
64
+ "rstrip": false,
65
+ "single_word": false,
66
+ "special": true
67
+ },
68
+ "151651": {
69
+ "content": "|<EXTRA_TOKENS_5>|",
70
+ "lstrip": false,
71
+ "normalized": false,
72
+ "rstrip": false,
73
+ "single_word": false,
74
+ "special": true
75
+ },
76
+ "151652": {
77
+ "content": "|<EXTRA_TOKENS_6>|",
78
+ "lstrip": false,
79
+ "normalized": false,
80
+ "rstrip": false,
81
+ "single_word": false,
82
+ "special": true
83
+ },
84
+ "151653": {
85
+ "content": "|<EXTRA_TOKENS_7>|",
86
+ "lstrip": false,
87
+ "normalized": false,
88
+ "rstrip": false,
89
+ "single_word": false,
90
+ "special": true
91
+ },
92
+ "151654": {
93
+ "content": "|<EXTRA_TOKENS_8>|",
94
+ "lstrip": false,
95
+ "normalized": false,
96
+ "rstrip": false,
97
+ "single_word": false,
98
+ "special": true
99
+ },
100
+ "151655": {
101
+ "content": "|<EXTRA_TOKENS_9>|",
102
+ "lstrip": false,
103
+ "normalized": false,
104
+ "rstrip": false,
105
+ "single_word": false,
106
+ "special": true
107
+ },
108
+ "151656": {
109
+ "content": "|<EXTRA_TOKENS_10>|",
110
+ "lstrip": false,
111
+ "normalized": false,
112
+ "rstrip": false,
113
+ "single_word": false,
114
+ "special": true
115
+ },
116
+ "151657": {
117
+ "content": "|<EXTRA_TOKENS_11>|",
118
+ "lstrip": false,
119
+ "normalized": false,
120
+ "rstrip": false,
121
+ "single_word": false,
122
+ "special": true
123
+ },
124
+ "151658": {
125
+ "content": "|<EXTRA_TOKENS_12>|",
126
+ "lstrip": false,
127
+ "normalized": false,
128
+ "rstrip": false,
129
+ "single_word": false,
130
+ "special": true
131
+ },
132
+ "151659": {
133
+ "content": "|<EXTRA_TOKENS_13>|",
134
+ "lstrip": false,
135
+ "normalized": false,
136
+ "rstrip": false,
137
+ "single_word": false,
138
+ "special": true
139
+ },
140
+ "151660": {
141
+ "content": "|<EXTRA_TOKENS_14>|",
142
+ "lstrip": false,
143
+ "normalized": false,
144
+ "rstrip": false,
145
+ "single_word": false,
146
+ "special": true
147
+ },
148
+ "151661": {
149
+ "content": "|<EXTRA_TOKENS_15>|",
150
+ "lstrip": false,
151
+ "normalized": false,
152
+ "rstrip": false,
153
+ "single_word": false,
154
+ "special": true
155
+ },
156
+ "151662": {
157
+ "content": "|<EXTRA_TOKENS_16>|",
158
+ "lstrip": false,
159
+ "normalized": false,
160
+ "rstrip": false,
161
+ "single_word": false,
162
+ "special": true
163
+ },
164
+ "151663": {
165
+ "content": "|<EXTRA_TOKENS_17>|",
166
+ "lstrip": false,
167
+ "normalized": false,
168
+ "rstrip": false,
169
+ "single_word": false,
170
+ "special": true
171
+ },
172
+ "151664": {
173
+ "content": "|<EXTRA_TOKENS_18>|",
174
+ "lstrip": false,
175
+ "normalized": false,
176
+ "rstrip": false,
177
+ "single_word": false,
178
+ "special": true
179
+ },
180
+ "151665": {
181
+ "content": "|<EXTRA_TOKENS_19>|",
182
+ "lstrip": false,
183
+ "normalized": false,
184
+ "rstrip": false,
185
+ "single_word": false,
186
+ "special": true
187
+ },
188
+ "151666": {
189
+ "content": "|<EXTRA_TOKENS_20>|",
190
+ "lstrip": false,
191
+ "normalized": false,
192
+ "rstrip": false,
193
+ "single_word": false,
194
+ "special": true
195
+ },
196
+ "151667": {
197
+ "content": "|<EXTRA_TOKENS_21>|",
198
+ "lstrip": false,
199
+ "normalized": false,
200
+ "rstrip": false,
201
+ "single_word": false,
202
+ "special": true
203
+ },
204
+ "151668": {
205
+ "content": "|<EXTRA_TOKENS_22>|",
206
+ "lstrip": false,
207
+ "normalized": false,
208
+ "rstrip": false,
209
+ "single_word": false,
210
+ "special": true
211
+ },
212
+ "151669": {
213
+ "content": "|<EXTRA_TOKENS_23>|",
214
+ "lstrip": false,
215
+ "normalized": false,
216
+ "rstrip": false,
217
+ "single_word": false,
218
+ "special": true
219
+ },
220
+ "151670": {
221
+ "content": "|<EXTRA_TOKENS_24>|",
222
+ "lstrip": false,
223
+ "normalized": false,
224
+ "rstrip": false,
225
+ "single_word": false,
226
+ "special": true
227
+ },
228
+ "151671": {
229
+ "content": "|<EXTRA_TOKENS_25>|",
230
+ "lstrip": false,
231
+ "normalized": false,
232
+ "rstrip": false,
233
+ "single_word": false,
234
+ "special": true
235
+ },
236
+ "151672": {
237
+ "content": "|<EXTRA_TOKENS_26>|",
238
+ "lstrip": false,
239
+ "normalized": false,
240
+ "rstrip": false,
241
+ "single_word": false,
242
+ "special": true
243
+ },
244
+ "151673": {
245
+ "content": "|<EXTRA_TOKENS_27>|",
246
+ "lstrip": false,
247
+ "normalized": false,
248
+ "rstrip": false,
249
+ "single_word": false,
250
+ "special": true
251
+ },
252
+ "151674": {
253
+ "content": "|<EXTRA_TOKENS_28>|",
254
+ "lstrip": false,
255
+ "normalized": false,
256
+ "rstrip": false,
257
+ "single_word": false,
258
+ "special": true
259
+ },
260
+ "151675": {
261
+ "content": "|<EXTRA_TOKENS_29>|",
262
+ "lstrip": false,
263
+ "normalized": false,
264
+ "rstrip": false,
265
+ "single_word": false,
266
+ "special": true
267
+ },
268
+ "151676": {
269
+ "content": "|<EXTRA_TOKENS_30>|",
270
+ "lstrip": false,
271
+ "normalized": false,
272
+ "rstrip": false,
273
+ "single_word": false,
274
+ "special": true
275
+ },
276
+ "151677": {
277
+ "content": "|<EXTRA_TOKENS_31>|",
278
+ "lstrip": false,
279
+ "normalized": false,
280
+ "rstrip": false,
281
+ "single_word": false,
282
+ "special": true
283
+ },
284
+ "151678": {
285
+ "content": "|<EXTRA_TOKENS_32>|",
286
+ "lstrip": false,
287
+ "normalized": false,
288
+ "rstrip": false,
289
+ "single_word": false,
290
+ "special": true
291
+ },
292
+ "151679": {
293
+ "content": "|<EXTRA_TOKENS_33>|",
294
+ "lstrip": false,
295
+ "normalized": false,
296
+ "rstrip": false,
297
+ "single_word": false,
298
+ "special": true
299
+ },
300
+ "151680": {
301
+ "content": "|<EXTRA_TOKENS_34>|",
302
+ "lstrip": false,
303
+ "normalized": false,
304
+ "rstrip": false,
305
+ "single_word": false,
306
+ "special": true
307
+ },
308
+ "151681": {
309
+ "content": "|<EXTRA_TOKENS_35>|",
310
+ "lstrip": false,
311
+ "normalized": false,
312
+ "rstrip": false,
313
+ "single_word": false,
314
+ "special": true
315
+ },
316
+ "151682": {
317
+ "content": "|<EXTRA_TOKENS_36>|",
318
+ "lstrip": false,
319
+ "normalized": false,
320
+ "rstrip": false,
321
+ "single_word": false,
322
+ "special": true
323
+ },
324
+ "151683": {
325
+ "content": "|<EXTRA_TOKENS_37>|",
326
+ "lstrip": false,
327
+ "normalized": false,
328
+ "rstrip": false,
329
+ "single_word": false,
330
+ "special": true
331
+ },
332
+ "151684": {
333
+ "content": "|<EXTRA_TOKENS_38>|",
334
+ "lstrip": false,
335
+ "normalized": false,
336
+ "rstrip": false,
337
+ "single_word": false,
338
+ "special": true
339
+ },
340
+ "151685": {
341
+ "content": "|<EXTRA_TOKENS_39>|",
342
+ "lstrip": false,
343
+ "normalized": false,
344
+ "rstrip": false,
345
+ "single_word": false,
346
+ "special": true
347
+ },
348
+ "151686": {
349
+ "content": "|<EXTRA_TOKENS_40>|",
350
+ "lstrip": false,
351
+ "normalized": false,
352
+ "rstrip": false,
353
+ "single_word": false,
354
+ "special": true
355
+ },
356
+ "151687": {
357
+ "content": "|<EXTRA_TOKENS_41>|",
358
+ "lstrip": false,
359
+ "normalized": false,
360
+ "rstrip": false,
361
+ "single_word": false,
362
+ "special": true
363
+ },
364
+ "151688": {
365
+ "content": "|<EXTRA_TOKENS_42>|",
366
+ "lstrip": false,
367
+ "normalized": false,
368
+ "rstrip": false,
369
+ "single_word": false,
370
+ "special": true
371
+ },
372
+ "151689": {
373
+ "content": "|<EXTRA_TOKENS_43>|",
374
+ "lstrip": false,
375
+ "normalized": false,
376
+ "rstrip": false,
377
+ "single_word": false,
378
+ "special": true
379
+ },
380
+ "151690": {
381
+ "content": "|<EXTRA_TOKENS_44>|",
382
+ "lstrip": false,
383
+ "normalized": false,
384
+ "rstrip": false,
385
+ "single_word": false,
386
+ "special": true
387
+ },
388
+ "151691": {
389
+ "content": "|<EXTRA_TOKENS_45>|",
390
+ "lstrip": false,
391
+ "normalized": false,
392
+ "rstrip": false,
393
+ "single_word": false,
394
+ "special": true
395
+ },
396
+ "151692": {
397
+ "content": "|<EXTRA_TOKENS_46>|",
398
+ "lstrip": false,
399
+ "normalized": false,
400
+ "rstrip": false,
401
+ "single_word": false,
402
+ "special": true
403
+ },
404
+ "151693": {
405
+ "content": "|<EXTRA_TOKENS_47>|",
406
+ "lstrip": false,
407
+ "normalized": false,
408
+ "rstrip": false,
409
+ "single_word": false,
410
+ "special": true
411
+ },
412
+ "151694": {
413
+ "content": "|<EXTRA_TOKENS_48>|",
414
+ "lstrip": false,
415
+ "normalized": false,
416
+ "rstrip": false,
417
+ "single_word": false,
418
+ "special": true
419
+ },
420
+ "151695": {
421
+ "content": "|<EXTRA_TOKENS_49>|",
422
+ "lstrip": false,
423
+ "normalized": false,
424
+ "rstrip": false,
425
+ "single_word": false,
426
+ "special": true
427
+ },
428
+ "151696": {
429
+ "content": "|<EXTRA_TOKENS_50>|",
430
+ "lstrip": false,
431
+ "normalized": false,
432
+ "rstrip": false,
433
+ "single_word": false,
434
+ "special": true
435
+ },
436
+ "151697": {
437
+ "content": "|<EXTRA_TOKENS_51>|",
438
+ "lstrip": false,
439
+ "normalized": false,
440
+ "rstrip": false,
441
+ "single_word": false,
442
+ "special": true
443
+ },
444
+ "151698": {
445
+ "content": "|<EXTRA_TOKENS_52>|",
446
+ "lstrip": false,
447
+ "normalized": false,
448
+ "rstrip": false,
449
+ "single_word": false,
450
+ "special": true
451
+ },
452
+ "151699": {
453
+ "content": "|<EXTRA_TOKENS_53>|",
454
+ "lstrip": false,
455
+ "normalized": false,
456
+ "rstrip": false,
457
+ "single_word": false,
458
+ "special": true
459
+ },
460
+ "151700": {
461
+ "content": "|<EXTRA_TOKENS_54>|",
462
+ "lstrip": false,
463
+ "normalized": false,
464
+ "rstrip": false,
465
+ "single_word": false,
466
+ "special": true
467
+ },
468
+ "151701": {
469
+ "content": "|<EXTRA_TOKENS_55>|",
470
+ "lstrip": false,
471
+ "normalized": false,
472
+ "rstrip": false,
473
+ "single_word": false,
474
+ "special": true
475
+ },
476
+ "151702": {
477
+ "content": "|<EXTRA_TOKENS_56>|",
478
+ "lstrip": false,
479
+ "normalized": false,
480
+ "rstrip": false,
481
+ "single_word": false,
482
+ "special": true
483
+ },
484
+ "151703": {
485
+ "content": "|<EXTRA_TOKENS_57>|",
486
+ "lstrip": false,
487
+ "normalized": false,
488
+ "rstrip": false,
489
+ "single_word": false,
490
+ "special": true
491
+ },
492
+ "151704": {
493
+ "content": "|<EXTRA_TOKENS_58>|",
494
+ "lstrip": false,
495
+ "normalized": false,
496
+ "rstrip": false,
497
+ "single_word": false,
498
+ "special": true
499
+ },
500
+ "151705": {
501
+ "content": "|<EXTRA_TOKENS_59>|",
502
+ "lstrip": false,
503
+ "normalized": false,
504
+ "rstrip": false,
505
+ "single_word": false,
506
+ "special": true
507
+ },
508
+ "151706": {
509
+ "content": "|<EXTRA_TOKENS_60>|",
510
+ "lstrip": false,
511
+ "normalized": false,
512
+ "rstrip": false,
513
+ "single_word": false,
514
+ "special": true
515
+ },
516
+ "151707": {
517
+ "content": "|<EXTRA_TOKENS_61>|",
518
+ "lstrip": false,
519
+ "normalized": false,
520
+ "rstrip": false,
521
+ "single_word": false,
522
+ "special": true
523
+ },
524
+ "151708": {
525
+ "content": "|<EXTRA_TOKENS_62>|",
526
+ "lstrip": false,
527
+ "normalized": false,
528
+ "rstrip": false,
529
+ "single_word": false,
530
+ "special": true
531
+ },
532
+ "151709": {
533
+ "content": "|<EXTRA_TOKENS_63>|",
534
+ "lstrip": false,
535
+ "normalized": false,
536
+ "rstrip": false,
537
+ "single_word": false,
538
+ "special": true
539
+ },
540
+ "151710": {
541
+ "content": "|<EXTRA_TOKENS_64>|",
542
+ "lstrip": false,
543
+ "normalized": false,
544
+ "rstrip": false,
545
+ "single_word": false,
546
+ "special": true
547
+ },
548
+ "151711": {
549
+ "content": "|<EXTRA_TOKENS_65>|",
550
+ "lstrip": false,
551
+ "normalized": false,
552
+ "rstrip": false,
553
+ "single_word": false,
554
+ "special": true
555
+ },
556
+ "151712": {
557
+ "content": "|<EXTRA_TOKENS_66>|",
558
+ "lstrip": false,
559
+ "normalized": false,
560
+ "rstrip": false,
561
+ "single_word": false,
562
+ "special": true
563
+ },
564
+ "151713": {
565
+ "content": "|<EXTRA_TOKENS_67>|",
566
+ "lstrip": false,
567
+ "normalized": false,
568
+ "rstrip": false,
569
+ "single_word": false,
570
+ "special": true
571
+ },
572
+ "151714": {
573
+ "content": "|<EXTRA_TOKENS_68>|",
574
+ "lstrip": false,
575
+ "normalized": false,
576
+ "rstrip": false,
577
+ "single_word": false,
578
+ "special": true
579
+ },
580
+ "151715": {
581
+ "content": "|<EXTRA_TOKENS_69>|",
582
+ "lstrip": false,
583
+ "normalized": false,
584
+ "rstrip": false,
585
+ "single_word": false,
586
+ "special": true
587
+ },
588
+ "151716": {
589
+ "content": "|<EXTRA_TOKENS_70>|",
590
+ "lstrip": false,
591
+ "normalized": false,
592
+ "rstrip": false,
593
+ "single_word": false,
594
+ "special": true
595
+ },
596
+ "151717": {
597
+ "content": "|<EXTRA_TOKENS_71>|",
598
+ "lstrip": false,
599
+ "normalized": false,
600
+ "rstrip": false,
601
+ "single_word": false,
602
+ "special": true
603
+ },
604
+ "151718": {
605
+ "content": "|<EXTRA_TOKENS_72>|",
606
+ "lstrip": false,
607
+ "normalized": false,
608
+ "rstrip": false,
609
+ "single_word": false,
610
+ "special": true
611
+ },
612
+ "151719": {
613
+ "content": "|<EXTRA_TOKENS_73>|",
614
+ "lstrip": false,
615
+ "normalized": false,
616
+ "rstrip": false,
617
+ "single_word": false,
618
+ "special": true
619
+ },
620
+ "151720": {
621
+ "content": "|<EXTRA_TOKENS_74>|",
622
+ "lstrip": false,
623
+ "normalized": false,
624
+ "rstrip": false,
625
+ "single_word": false,
626
+ "special": true
627
+ },
628
+ "151721": {
629
+ "content": "|<EXTRA_TOKENS_75>|",
630
+ "lstrip": false,
631
+ "normalized": false,
632
+ "rstrip": false,
633
+ "single_word": false,
634
+ "special": true
635
+ },
636
+ "151722": {
637
+ "content": "|<EXTRA_TOKENS_76>|",
638
+ "lstrip": false,
639
+ "normalized": false,
640
+ "rstrip": false,
641
+ "single_word": false,
642
+ "special": true
643
+ },
644
+ "151723": {
645
+ "content": "|<EXTRA_TOKENS_77>|",
646
+ "lstrip": false,
647
+ "normalized": false,
648
+ "rstrip": false,
649
+ "single_word": false,
650
+ "special": true
651
+ },
652
+ "151724": {
653
+ "content": "|<EXTRA_TOKENS_78>|",
654
+ "lstrip": false,
655
+ "normalized": false,
656
+ "rstrip": false,
657
+ "single_word": false,
658
+ "special": true
659
+ },
660
+ "151725": {
661
+ "content": "|<EXTRA_TOKENS_79>|",
662
+ "lstrip": false,
663
+ "normalized": false,
664
+ "rstrip": false,
665
+ "single_word": false,
666
+ "special": true
667
+ },
668
+ "151726": {
669
+ "content": "|<EXTRA_TOKENS_80>|",
670
+ "lstrip": false,
671
+ "normalized": false,
672
+ "rstrip": false,
673
+ "single_word": false,
674
+ "special": true
675
+ },
676
+ "151727": {
677
+ "content": "|<EXTRA_TOKENS_81>|",
678
+ "lstrip": false,
679
+ "normalized": false,
680
+ "rstrip": false,
681
+ "single_word": false,
682
+ "special": true
683
+ },
684
+ "151728": {
685
+ "content": "|<EXTRA_TOKENS_82>|",
686
+ "lstrip": false,
687
+ "normalized": false,
688
+ "rstrip": false,
689
+ "single_word": false,
690
+ "special": true
691
+ },
692
+ "151729": {
693
+ "content": "|<EXTRA_TOKENS_83>|",
694
+ "lstrip": false,
695
+ "normalized": false,
696
+ "rstrip": false,
697
+ "single_word": false,
698
+ "special": true
699
+ },
700
+ "151730": {
701
+ "content": "|<EXTRA_TOKENS_84>|",
702
+ "lstrip": false,
703
+ "normalized": false,
704
+ "rstrip": false,
705
+ "single_word": false,
706
+ "special": true
707
+ },
708
+ "151731": {
709
+ "content": "|<EXTRA_TOKENS_85>|",
710
+ "lstrip": false,
711
+ "normalized": false,
712
+ "rstrip": false,
713
+ "single_word": false,
714
+ "special": true
715
+ },
716
+ "151732": {
717
+ "content": "|<EXTRA_TOKENS_86>|",
718
+ "lstrip": false,
719
+ "normalized": false,
720
+ "rstrip": false,
721
+ "single_word": false,
722
+ "special": true
723
+ },
724
+ "151733": {
725
+ "content": "|<EXTRA_TOKENS_87>|",
726
+ "lstrip": false,
727
+ "normalized": false,
728
+ "rstrip": false,
729
+ "single_word": false,
730
+ "special": true
731
+ },
732
+ "151734": {
733
+ "content": "|<EXTRA_TOKENS_88>|",
734
+ "lstrip": false,
735
+ "normalized": false,
736
+ "rstrip": false,
737
+ "single_word": false,
738
+ "special": true
739
+ },
740
+ "151735": {
741
+ "content": "|<EXTRA_TOKENS_89>|",
742
+ "lstrip": false,
743
+ "normalized": false,
744
+ "rstrip": false,
745
+ "single_word": false,
746
+ "special": true
747
+ },
748
+ "151736": {
749
+ "content": "|<EXTRA_TOKENS_90>|",
750
+ "lstrip": false,
751
+ "normalized": false,
752
+ "rstrip": false,
753
+ "single_word": false,
754
+ "special": true
755
+ },
756
+ "151737": {
757
+ "content": "|<EXTRA_TOKENS_91>|",
758
+ "lstrip": false,
759
+ "normalized": false,
760
+ "rstrip": false,
761
+ "single_word": false,
762
+ "special": true
763
+ },
764
+ "151738": {
765
+ "content": "|<EXTRA_TOKENS_92>|",
766
+ "lstrip": false,
767
+ "normalized": false,
768
+ "rstrip": false,
769
+ "single_word": false,
770
+ "special": true
771
+ },
772
+ "151739": {
773
+ "content": "|<EXTRA_TOKENS_93>|",
774
+ "lstrip": false,
775
+ "normalized": false,
776
+ "rstrip": false,
777
+ "single_word": false,
778
+ "special": true
779
+ },
780
+ "151740": {
781
+ "content": "|<EXTRA_TOKENS_94>|",
782
+ "lstrip": false,
783
+ "normalized": false,
784
+ "rstrip": false,
785
+ "single_word": false,
786
+ "special": true
787
+ },
788
+ "151741": {
789
+ "content": "|<EXTRA_TOKENS_95>|",
790
+ "lstrip": false,
791
+ "normalized": false,
792
+ "rstrip": false,
793
+ "single_word": false,
794
+ "special": true
795
+ },
796
+ "151742": {
797
+ "content": "|<EXTRA_TOKENS_96>|",
798
+ "lstrip": false,
799
+ "normalized": false,
800
+ "rstrip": false,
801
+ "single_word": false,
802
+ "special": true
803
+ },
804
+ "151743": {
805
+ "content": "|<EXTRA_TOKENS_97>|",
806
+ "lstrip": false,
807
+ "normalized": false,
808
+ "rstrip": false,
809
+ "single_word": false,
810
+ "special": true
811
+ },
812
+ "151744": {
813
+ "content": "|<EXTRA_TOKENS_98>|",
814
+ "lstrip": false,
815
+ "normalized": false,
816
+ "rstrip": false,
817
+ "single_word": false,
818
+ "special": true
819
+ },
820
+ "151745": {
821
+ "content": "|<EXTRA_TOKENS_99>|",
822
+ "lstrip": false,
823
+ "normalized": false,
824
+ "rstrip": false,
825
+ "single_word": false,
826
+ "special": true
827
+ },
828
+ "151746": {
829
+ "content": "|<EXTRA_TOKENS_100>|",
830
+ "lstrip": false,
831
+ "normalized": false,
832
+ "rstrip": false,
833
+ "single_word": false,
834
+ "special": true
835
+ },
836
+ "151747": {
837
+ "content": "|<EXTRA_TOKENS_101>|",
838
+ "lstrip": false,
839
+ "normalized": false,
840
+ "rstrip": false,
841
+ "single_word": false,
842
+ "special": true
843
+ },
844
+ "151748": {
845
+ "content": "|<EXTRA_TOKENS_102>|",
846
+ "lstrip": false,
847
+ "normalized": false,
848
+ "rstrip": false,
849
+ "single_word": false,
850
+ "special": true
851
+ },
852
+ "151749": {
853
+ "content": "|<EXTRA_TOKENS_103>|",
854
+ "lstrip": false,
855
+ "normalized": false,
856
+ "rstrip": false,
857
+ "single_word": false,
858
+ "special": true
859
+ },
860
+ "151750": {
861
+ "content": "|<EXTRA_TOKENS_104>|",
862
+ "lstrip": false,
863
+ "normalized": false,
864
+ "rstrip": false,
865
+ "single_word": false,
866
+ "special": true
867
+ },
868
+ "151751": {
869
+ "content": "|<EXTRA_TOKENS_105>|",
870
+ "lstrip": false,
871
+ "normalized": false,
872
+ "rstrip": false,
873
+ "single_word": false,
874
+ "special": true
875
+ },
876
+ "151752": {
877
+ "content": "|<EXTRA_TOKENS_106>|",
878
+ "lstrip": false,
879
+ "normalized": false,
880
+ "rstrip": false,
881
+ "single_word": false,
882
+ "special": true
883
+ },
884
+ "151753": {
885
+ "content": "|<EXTRA_TOKENS_107>|",
886
+ "lstrip": false,
887
+ "normalized": false,
888
+ "rstrip": false,
889
+ "single_word": false,
890
+ "special": true
891
+ },
892
+ "151754": {
893
+ "content": "|<EXTRA_TOKENS_108>|",
894
+ "lstrip": false,
895
+ "normalized": false,
896
+ "rstrip": false,
897
+ "single_word": false,
898
+ "special": true
899
+ },
900
+ "151755": {
901
+ "content": "|<EXTRA_TOKENS_109>|",
902
+ "lstrip": false,
903
+ "normalized": false,
904
+ "rstrip": false,
905
+ "single_word": false,
906
+ "special": true
907
+ },
908
+ "151756": {
909
+ "content": "|<EXTRA_TOKENS_110>|",
910
+ "lstrip": false,
911
+ "normalized": false,
912
+ "rstrip": false,
913
+ "single_word": false,
914
+ "special": true
915
+ },
916
+ "151757": {
917
+ "content": "|<EXTRA_TOKENS_111>|",
918
+ "lstrip": false,
919
+ "normalized": false,
920
+ "rstrip": false,
921
+ "single_word": false,
922
+ "special": true
923
+ },
924
+ "151758": {
925
+ "content": "|<EXTRA_TOKENS_112>|",
926
+ "lstrip": false,
927
+ "normalized": false,
928
+ "rstrip": false,
929
+ "single_word": false,
930
+ "special": true
931
+ },
932
+ "151759": {
933
+ "content": "|<EXTRA_TOKENS_113>|",
934
+ "lstrip": false,
935
+ "normalized": false,
936
+ "rstrip": false,
937
+ "single_word": false,
938
+ "special": true
939
+ },
940
+ "151760": {
941
+ "content": "|<EXTRA_TOKENS_114>|",
942
+ "lstrip": false,
943
+ "normalized": false,
944
+ "rstrip": false,
945
+ "single_word": false,
946
+ "special": true
947
+ },
948
+ "151761": {
949
+ "content": "|<EXTRA_TOKENS_115>|",
950
+ "lstrip": false,
951
+ "normalized": false,
952
+ "rstrip": false,
953
+ "single_word": false,
954
+ "special": true
955
+ },
956
+ "151762": {
957
+ "content": "|<EXTRA_TOKENS_116>|",
958
+ "lstrip": false,
959
+ "normalized": false,
960
+ "rstrip": false,
961
+ "single_word": false,
962
+ "special": true
963
+ },
964
+ "151763": {
965
+ "content": "|<EXTRA_TOKENS_117>|",
966
+ "lstrip": false,
967
+ "normalized": false,
968
+ "rstrip": false,
969
+ "single_word": false,
970
+ "special": true
971
+ },
972
+ "151764": {
973
+ "content": "|<EXTRA_TOKENS_118>|",
974
+ "lstrip": false,
975
+ "normalized": false,
976
+ "rstrip": false,
977
+ "single_word": false,
978
+ "special": true
979
+ },
980
+ "151765": {
981
+ "content": "|<EXTRA_TOKENS_119>|",
982
+ "lstrip": false,
983
+ "normalized": false,
984
+ "rstrip": false,
985
+ "single_word": false,
986
+ "special": true
987
+ },
988
+ "151766": {
989
+ "content": "|<EXTRA_TOKENS_120>|",
990
+ "lstrip": false,
991
+ "normalized": false,
992
+ "rstrip": false,
993
+ "single_word": false,
994
+ "special": true
995
+ },
996
+ "151767": {
997
+ "content": "|<EXTRA_TOKENS_121>|",
998
+ "lstrip": false,
999
+ "normalized": false,
1000
+ "rstrip": false,
1001
+ "single_word": false,
1002
+ "special": true
1003
+ },
1004
+ "151768": {
1005
+ "content": "|<EXTRA_TOKENS_122>|",
1006
+ "lstrip": false,
1007
+ "normalized": false,
1008
+ "rstrip": false,
1009
+ "single_word": false,
1010
+ "special": true
1011
+ },
1012
+ "151769": {
1013
+ "content": "|<EXTRA_TOKENS_123>|",
1014
+ "lstrip": false,
1015
+ "normalized": false,
1016
+ "rstrip": false,
1017
+ "single_word": false,
1018
+ "special": true
1019
+ },
1020
+ "151770": {
1021
+ "content": "|<EXTRA_TOKENS_124>|",
1022
+ "lstrip": false,
1023
+ "normalized": false,
1024
+ "rstrip": false,
1025
+ "single_word": false,
1026
+ "special": true
1027
+ },
1028
+ "151771": {
1029
+ "content": "|<EXTRA_TOKENS_125>|",
1030
+ "lstrip": false,
1031
+ "normalized": false,
1032
+ "rstrip": false,
1033
+ "single_word": false,
1034
+ "special": true
1035
+ },
1036
+ "151772": {
1037
+ "content": "|<EXTRA_TOKENS_126>|",
1038
+ "lstrip": false,
1039
+ "normalized": false,
1040
+ "rstrip": false,
1041
+ "single_word": false,
1042
+ "special": true
1043
+ },
1044
+ "151773": {
1045
+ "content": "|<EXTRA_TOKENS_127>|",
1046
+ "lstrip": false,
1047
+ "normalized": false,
1048
+ "rstrip": false,
1049
+ "single_word": false,
1050
+ "special": true
1051
+ },
1052
+ "151774": {
1053
+ "content": "|<EXTRA_TOKENS_128>|",
1054
+ "lstrip": false,
1055
+ "normalized": false,
1056
+ "rstrip": false,
1057
+ "single_word": false,
1058
+ "special": true
1059
+ },
1060
+ "151775": {
1061
+ "content": "|<EXTRA_TOKENS_129>|",
1062
+ "lstrip": false,
1063
+ "normalized": false,
1064
+ "rstrip": false,
1065
+ "single_word": false,
1066
+ "special": true
1067
+ },
1068
+ "151776": {
1069
+ "content": "|<EXTRA_TOKENS_130>|",
1070
+ "lstrip": false,
1071
+ "normalized": false,
1072
+ "rstrip": false,
1073
+ "single_word": false,
1074
+ "special": true
1075
+ },
1076
+ "151777": {
1077
+ "content": "|<EXTRA_TOKENS_131>|",
1078
+ "lstrip": false,
1079
+ "normalized": false,
1080
+ "rstrip": false,
1081
+ "single_word": false,
1082
+ "special": true
1083
+ },
1084
+ "151778": {
1085
+ "content": "|<EXTRA_TOKENS_132>|",
1086
+ "lstrip": false,
1087
+ "normalized": false,
1088
+ "rstrip": false,
1089
+ "single_word": false,
1090
+ "special": true
1091
+ },
1092
+ "151779": {
1093
+ "content": "|<EXTRA_TOKENS_133>|",
1094
+ "lstrip": false,
1095
+ "normalized": false,
1096
+ "rstrip": false,
1097
+ "single_word": false,
1098
+ "special": true
1099
+ },
1100
+ "151780": {
1101
+ "content": "|<EXTRA_TOKENS_134>|",
1102
+ "lstrip": false,
1103
+ "normalized": false,
1104
+ "rstrip": false,
1105
+ "single_word": false,
1106
+ "special": true
1107
+ },
1108
+ "151781": {
1109
+ "content": "|<EXTRA_TOKENS_135>|",
1110
+ "lstrip": false,
1111
+ "normalized": false,
1112
+ "rstrip": false,
1113
+ "single_word": false,
1114
+ "special": true
1115
+ },
1116
+ "151782": {
1117
+ "content": "|<EXTRA_TOKENS_136>|",
1118
+ "lstrip": false,
1119
+ "normalized": false,
1120
+ "rstrip": false,
1121
+ "single_word": false,
1122
+ "special": true
1123
+ },
1124
+ "151783": {
1125
+ "content": "|<EXTRA_TOKENS_137>|",
1126
+ "lstrip": false,
1127
+ "normalized": false,
1128
+ "rstrip": false,
1129
+ "single_word": false,
1130
+ "special": true
1131
+ },
1132
+ "151784": {
1133
+ "content": "|<EXTRA_TOKENS_138>|",
1134
+ "lstrip": false,
1135
+ "normalized": false,
1136
+ "rstrip": false,
1137
+ "single_word": false,
1138
+ "special": true
1139
+ },
1140
+ "151785": {
1141
+ "content": "|<EXTRA_TOKENS_139>|",
1142
+ "lstrip": false,
1143
+ "normalized": false,
1144
+ "rstrip": false,
1145
+ "single_word": false,
1146
+ "special": true
1147
+ },
1148
+ "151786": {
1149
+ "content": "|<EXTRA_TOKENS_140>|",
1150
+ "lstrip": false,
1151
+ "normalized": false,
1152
+ "rstrip": false,
1153
+ "single_word": false,
1154
+ "special": true
1155
+ },
1156
+ "151787": {
1157
+ "content": "|<EXTRA_TOKENS_141>|",
1158
+ "lstrip": false,
1159
+ "normalized": false,
1160
+ "rstrip": false,
1161
+ "single_word": false,
1162
+ "special": true
1163
+ },
1164
+ "151788": {
1165
+ "content": "|<EXTRA_TOKENS_142>|",
1166
+ "lstrip": false,
1167
+ "normalized": false,
1168
+ "rstrip": false,
1169
+ "single_word": false,
1170
+ "special": true
1171
+ },
1172
+ "151789": {
1173
+ "content": "|<EXTRA_TOKENS_143>|",
1174
+ "lstrip": false,
1175
+ "normalized": false,
1176
+ "rstrip": false,
1177
+ "single_word": false,
1178
+ "special": true
1179
+ },
1180
+ "151790": {
1181
+ "content": "|<EXTRA_TOKENS_144>|",
1182
+ "lstrip": false,
1183
+ "normalized": false,
1184
+ "rstrip": false,
1185
+ "single_word": false,
1186
+ "special": true
1187
+ },
1188
+ "151791": {
1189
+ "content": "|<EXTRA_TOKENS_145>|",
1190
+ "lstrip": false,
1191
+ "normalized": false,
1192
+ "rstrip": false,
1193
+ "single_word": false,
1194
+ "special": true
1195
+ },
1196
+ "151792": {
1197
+ "content": "|<EXTRA_TOKENS_146>|",
1198
+ "lstrip": false,
1199
+ "normalized": false,
1200
+ "rstrip": false,
1201
+ "single_word": false,
1202
+ "special": true
1203
+ },
1204
+ "151793": {
1205
+ "content": "|<EXTRA_TOKENS_147>|",
1206
+ "lstrip": false,
1207
+ "normalized": false,
1208
+ "rstrip": false,
1209
+ "single_word": false,
1210
+ "special": true
1211
+ },
1212
+ "151794": {
1213
+ "content": "|<EXTRA_TOKENS_148>|",
1214
+ "lstrip": false,
1215
+ "normalized": false,
1216
+ "rstrip": false,
1217
+ "single_word": false,
1218
+ "special": true
1219
+ },
1220
+ "151795": {
1221
+ "content": "|<EXTRA_TOKENS_149>|",
1222
+ "lstrip": false,
1223
+ "normalized": false,
1224
+ "rstrip": false,
1225
+ "single_word": false,
1226
+ "special": true
1227
+ },
1228
+ "151796": {
1229
+ "content": "|<EXTRA_TOKENS_150>|",
1230
+ "lstrip": false,
1231
+ "normalized": false,
1232
+ "rstrip": false,
1233
+ "single_word": false,
1234
+ "special": true
1235
+ },
1236
+ "151797": {
1237
+ "content": "|<EXTRA_TOKENS_151>|",
1238
+ "lstrip": false,
1239
+ "normalized": false,
1240
+ "rstrip": false,
1241
+ "single_word": false,
1242
+ "special": true
1243
+ },
1244
+ "151798": {
1245
+ "content": "|<EXTRA_TOKENS_152>|",
1246
+ "lstrip": false,
1247
+ "normalized": false,
1248
+ "rstrip": false,
1249
+ "single_word": false,
1250
+ "special": true
1251
+ },
1252
+ "151799": {
1253
+ "content": "|<EXTRA_TOKENS_153>|",
1254
+ "lstrip": false,
1255
+ "normalized": false,
1256
+ "rstrip": false,
1257
+ "single_word": false,
1258
+ "special": true
1259
+ },
1260
+ "151800": {
1261
+ "content": "|<EXTRA_TOKENS_154>|",
1262
+ "lstrip": false,
1263
+ "normalized": false,
1264
+ "rstrip": false,
1265
+ "single_word": false,
1266
+ "special": true
1267
+ },
1268
+ "151801": {
1269
+ "content": "|<EXTRA_TOKENS_155>|",
1270
+ "lstrip": false,
1271
+ "normalized": false,
1272
+ "rstrip": false,
1273
+ "single_word": false,
1274
+ "special": true
1275
+ },
1276
+ "151802": {
1277
+ "content": "|<EXTRA_TOKENS_156>|",
1278
+ "lstrip": false,
1279
+ "normalized": false,
1280
+ "rstrip": false,
1281
+ "single_word": false,
1282
+ "special": true
1283
+ },
1284
+ "151803": {
1285
+ "content": "|<EXTRA_TOKENS_157>|",
1286
+ "lstrip": false,
1287
+ "normalized": false,
1288
+ "rstrip": false,
1289
+ "single_word": false,
1290
+ "special": true
1291
+ },
1292
+ "151804": {
1293
+ "content": "|<EXTRA_TOKENS_158>|",
1294
+ "lstrip": false,
1295
+ "normalized": false,
1296
+ "rstrip": false,
1297
+ "single_word": false,
1298
+ "special": true
1299
+ },
1300
+ "151805": {
1301
+ "content": "|<EXTRA_TOKENS_159>|",
1302
+ "lstrip": false,
1303
+ "normalized": false,
1304
+ "rstrip": false,
1305
+ "single_word": false,
1306
+ "special": true
1307
+ },
1308
+ "151806": {
1309
+ "content": "|<EXTRA_TOKENS_160>|",
1310
+ "lstrip": false,
1311
+ "normalized": false,
1312
+ "rstrip": false,
1313
+ "single_word": false,
1314
+ "special": true
1315
+ },
1316
+ "151807": {
1317
+ "content": "|<EXTRA_TOKENS_161>|",
1318
+ "lstrip": false,
1319
+ "normalized": false,
1320
+ "rstrip": false,
1321
+ "single_word": false,
1322
+ "special": true
1323
+ },
1324
+ "151808": {
1325
+ "content": "|<EXTRA_TOKENS_162>|",
1326
+ "lstrip": false,
1327
+ "normalized": false,
1328
+ "rstrip": false,
1329
+ "single_word": false,
1330
+ "special": true
1331
+ },
1332
+ "151809": {
1333
+ "content": "|<EXTRA_TOKENS_163>|",
1334
+ "lstrip": false,
1335
+ "normalized": false,
1336
+ "rstrip": false,
1337
+ "single_word": false,
1338
+ "special": true
1339
+ },
1340
+ "151810": {
1341
+ "content": "|<EXTRA_TOKENS_164>|",
1342
+ "lstrip": false,
1343
+ "normalized": false,
1344
+ "rstrip": false,
1345
+ "single_word": false,
1346
+ "special": true
1347
+ },
1348
+ "151811": {
1349
+ "content": "|<EXTRA_TOKENS_165>|",
1350
+ "lstrip": false,
1351
+ "normalized": false,
1352
+ "rstrip": false,
1353
+ "single_word": false,
1354
+ "special": true
1355
+ },
1356
+ "151812": {
1357
+ "content": "|<EXTRA_TOKENS_166>|",
1358
+ "lstrip": false,
1359
+ "normalized": false,
1360
+ "rstrip": false,
1361
+ "single_word": false,
1362
+ "special": true
1363
+ },
1364
+ "151813": {
1365
+ "content": "|<EXTRA_TOKENS_167>|",
1366
+ "lstrip": false,
1367
+ "normalized": false,
1368
+ "rstrip": false,
1369
+ "single_word": false,
1370
+ "special": true
1371
+ },
1372
+ "151814": {
1373
+ "content": "|<EXTRA_TOKENS_168>|",
1374
+ "lstrip": false,
1375
+ "normalized": false,
1376
+ "rstrip": false,
1377
+ "single_word": false,
1378
+ "special": true
1379
+ },
1380
+ "151815": {
1381
+ "content": "|<EXTRA_TOKENS_169>|",
1382
+ "lstrip": false,
1383
+ "normalized": false,
1384
+ "rstrip": false,
1385
+ "single_word": false,
1386
+ "special": true
1387
+ },
1388
+ "151816": {
1389
+ "content": "|<EXTRA_TOKENS_170>|",
1390
+ "lstrip": false,
1391
+ "normalized": false,
1392
+ "rstrip": false,
1393
+ "single_word": false,
1394
+ "special": true
1395
+ },
1396
+ "151817": {
1397
+ "content": "|<EXTRA_TOKENS_171>|",
1398
+ "lstrip": false,
1399
+ "normalized": false,
1400
+ "rstrip": false,
1401
+ "single_word": false,
1402
+ "special": true
1403
+ },
1404
+ "151818": {
1405
+ "content": "|<EXTRA_TOKENS_172>|",
1406
+ "lstrip": false,
1407
+ "normalized": false,
1408
+ "rstrip": false,
1409
+ "single_word": false,
1410
+ "special": true
1411
+ },
1412
+ "151819": {
1413
+ "content": "|<EXTRA_TOKENS_173>|",
1414
+ "lstrip": false,
1415
+ "normalized": false,
1416
+ "rstrip": false,
1417
+ "single_word": false,
1418
+ "special": true
1419
+ },
1420
+ "151820": {
1421
+ "content": "|<EXTRA_TOKENS_174>|",
1422
+ "lstrip": false,
1423
+ "normalized": false,
1424
+ "rstrip": false,
1425
+ "single_word": false,
1426
+ "special": true
1427
+ },
1428
+ "151821": {
1429
+ "content": "|<EXTRA_TOKENS_175>|",
1430
+ "lstrip": false,
1431
+ "normalized": false,
1432
+ "rstrip": false,
1433
+ "single_word": false,
1434
+ "special": true
1435
+ },
1436
+ "151822": {
1437
+ "content": "|<EXTRA_TOKENS_176>|",
1438
+ "lstrip": false,
1439
+ "normalized": false,
1440
+ "rstrip": false,
1441
+ "single_word": false,
1442
+ "special": true
1443
+ },
1444
+ "151823": {
1445
+ "content": "|<EXTRA_TOKENS_177>|",
1446
+ "lstrip": false,
1447
+ "normalized": false,
1448
+ "rstrip": false,
1449
+ "single_word": false,
1450
+ "special": true
1451
+ },
1452
+ "151824": {
1453
+ "content": "|<EXTRA_TOKENS_178>|",
1454
+ "lstrip": false,
1455
+ "normalized": false,
1456
+ "rstrip": false,
1457
+ "single_word": false,
1458
+ "special": true
1459
+ },
1460
+ "151825": {
1461
+ "content": "|<EXTRA_TOKENS_179>|",
1462
+ "lstrip": false,
1463
+ "normalized": false,
1464
+ "rstrip": false,
1465
+ "single_word": false,
1466
+ "special": true
1467
+ },
1468
+ "151826": {
1469
+ "content": "|<EXTRA_TOKENS_180>|",
1470
+ "lstrip": false,
1471
+ "normalized": false,
1472
+ "rstrip": false,
1473
+ "single_word": false,
1474
+ "special": true
1475
+ },
1476
+ "151827": {
1477
+ "content": "|<EXTRA_TOKENS_181>|",
1478
+ "lstrip": false,
1479
+ "normalized": false,
1480
+ "rstrip": false,
1481
+ "single_word": false,
1482
+ "special": true
1483
+ },
1484
+ "151828": {
1485
+ "content": "|<EXTRA_TOKENS_182>|",
1486
+ "lstrip": false,
1487
+ "normalized": false,
1488
+ "rstrip": false,
1489
+ "single_word": false,
1490
+ "special": true
1491
+ },
1492
+ "151829": {
1493
+ "content": "|<EXTRA_TOKENS_183>|",
1494
+ "lstrip": false,
1495
+ "normalized": false,
1496
+ "rstrip": false,
1497
+ "single_word": false,
1498
+ "special": true
1499
+ },
1500
+ "151830": {
1501
+ "content": "|<EXTRA_TOKENS_184>|",
1502
+ "lstrip": false,
1503
+ "normalized": false,
1504
+ "rstrip": false,
1505
+ "single_word": false,
1506
+ "special": true
1507
+ },
1508
+ "151831": {
1509
+ "content": "|<EXTRA_TOKENS_185>|",
1510
+ "lstrip": false,
1511
+ "normalized": false,
1512
+ "rstrip": false,
1513
+ "single_word": false,
1514
+ "special": true
1515
+ },
1516
+ "151832": {
1517
+ "content": "|<EXTRA_TOKENS_186>|",
1518
+ "lstrip": false,
1519
+ "normalized": false,
1520
+ "rstrip": false,
1521
+ "single_word": false,
1522
+ "special": true
1523
+ },
1524
+ "151833": {
1525
+ "content": "|<EXTRA_TOKENS_187>|",
1526
+ "lstrip": false,
1527
+ "normalized": false,
1528
+ "rstrip": false,
1529
+ "single_word": false,
1530
+ "special": true
1531
+ },
1532
+ "151834": {
1533
+ "content": "|<EXTRA_TOKENS_188>|",
1534
+ "lstrip": false,
1535
+ "normalized": false,
1536
+ "rstrip": false,
1537
+ "single_word": false,
1538
+ "special": true
1539
+ },
1540
+ "151835": {
1541
+ "content": "|<EXTRA_TOKENS_189>|",
1542
+ "lstrip": false,
1543
+ "normalized": false,
1544
+ "rstrip": false,
1545
+ "single_word": false,
1546
+ "special": true
1547
+ },
1548
+ "151836": {
1549
+ "content": "|<EXTRA_TOKENS_190>|",
1550
+ "lstrip": false,
1551
+ "normalized": false,
1552
+ "rstrip": false,
1553
+ "single_word": false,
1554
+ "special": true
1555
+ },
1556
+ "151837": {
1557
+ "content": "|<EXTRA_TOKENS_191>|",
1558
+ "lstrip": false,
1559
+ "normalized": false,
1560
+ "rstrip": false,
1561
+ "single_word": false,
1562
+ "special": true
1563
+ },
1564
+ "151838": {
1565
+ "content": "|<EXTRA_TOKENS_192>|",
1566
+ "lstrip": false,
1567
+ "normalized": false,
1568
+ "rstrip": false,
1569
+ "single_word": false,
1570
+ "special": true
1571
+ },
1572
+ "151839": {
1573
+ "content": "|<EXTRA_TOKENS_193>|",
1574
+ "lstrip": false,
1575
+ "normalized": false,
1576
+ "rstrip": false,
1577
+ "single_word": false,
1578
+ "special": true
1579
+ },
1580
+ "151840": {
1581
+ "content": "|<EXTRA_TOKENS_194>|",
1582
+ "lstrip": false,
1583
+ "normalized": false,
1584
+ "rstrip": false,
1585
+ "single_word": false,
1586
+ "special": true
1587
+ },
1588
+ "151841": {
1589
+ "content": "|<EXTRA_TOKENS_195>|",
1590
+ "lstrip": false,
1591
+ "normalized": false,
1592
+ "rstrip": false,
1593
+ "single_word": false,
1594
+ "special": true
1595
+ },
1596
+ "151842": {
1597
+ "content": "|<EXTRA_TOKENS_196>|",
1598
+ "lstrip": false,
1599
+ "normalized": false,
1600
+ "rstrip": false,
1601
+ "single_word": false,
1602
+ "special": true
1603
+ },
1604
+ "151843": {
1605
+ "content": "|<EXTRA_TOKENS_197>|",
1606
+ "lstrip": false,
1607
+ "normalized": false,
1608
+ "rstrip": false,
1609
+ "single_word": false,
1610
+ "special": true
1611
+ },
1612
+ "151844": {
1613
+ "content": "|<EXTRA_TOKENS_198>|",
1614
+ "lstrip": false,
1615
+ "normalized": false,
1616
+ "rstrip": false,
1617
+ "single_word": false,
1618
+ "special": true
1619
+ },
1620
+ "151845": {
1621
+ "content": "|<EXTRA_TOKENS_199>|",
1622
+ "lstrip": false,
1623
+ "normalized": false,
1624
+ "rstrip": false,
1625
+ "single_word": false,
1626
+ "special": true
1627
+ },
1628
+ "151846": {
1629
+ "content": "|<EXTRA_TOKENS_200>|",
1630
+ "lstrip": false,
1631
+ "normalized": false,
1632
+ "rstrip": false,
1633
+ "single_word": false,
1634
+ "special": true
1635
+ },
1636
+ "151847": {
1637
+ "content": "|<EXTRA_TOKENS_201>|",
1638
+ "lstrip": false,
1639
+ "normalized": false,
1640
+ "rstrip": false,
1641
+ "single_word": false,
1642
+ "special": true
1643
+ },
1644
+ "151848": {
1645
+ "content": "|<EXTRA_TOKENS_202>|",
1646
+ "lstrip": false,
1647
+ "normalized": false,
1648
+ "rstrip": false,
1649
+ "single_word": false,
1650
+ "special": true
1651
+ },
1652
+ "151849": {
1653
+ "content": "|<EXTRA_TOKENS_203>|",
1654
+ "lstrip": false,
1655
+ "normalized": false,
1656
+ "rstrip": false,
1657
+ "single_word": false,
1658
+ "special": true
1659
+ },
1660
+ "151850": {
1661
+ "content": "|<EXTRA_TOKENS_204>|",
1662
+ "lstrip": false,
1663
+ "normalized": false,
1664
+ "rstrip": false,
1665
+ "single_word": false,
1666
+ "special": true
1667
+ },
1668
+ "151851": {
1669
+ "content": "|<EXTRA_TOKENS_205>|",
1670
+ "lstrip": false,
1671
+ "normalized": false,
1672
+ "rstrip": false,
1673
+ "single_word": false,
1674
+ "special": true
1675
+ },
1676
+ "151852": {
1677
+ "content": "|<EXTRA_TOKENS_206>|",
1678
+ "lstrip": false,
1679
+ "normalized": false,
1680
+ "rstrip": false,
1681
+ "single_word": false,
1682
+ "special": true
1683
+ },
1684
+ "151853": {
1685
+ "content": "|<EXTRA_TOKENS_207>|",
1686
+ "lstrip": false,
1687
+ "normalized": false,
1688
+ "rstrip": false,
1689
+ "single_word": false,
1690
+ "special": true
1691
+ },
1692
+ "151854": {
1693
+ "content": "|<EXTRA_TOKENS_208>|",
1694
+ "lstrip": false,
1695
+ "normalized": false,
1696
+ "rstrip": false,
1697
+ "single_word": false,
1698
+ "special": true
1699
+ },
1700
+ "151855": {
1701
+ "content": "|<EXTRA_TOKENS_209>|",
1702
+ "lstrip": false,
1703
+ "normalized": false,
1704
+ "rstrip": false,
1705
+ "single_word": false,
1706
+ "special": true
1707
+ },
1708
+ "151856": {
1709
+ "content": "|<EXTRA_TOKENS_210>|",
1710
+ "lstrip": false,
1711
+ "normalized": false,
1712
+ "rstrip": false,
1713
+ "single_word": false,
1714
+ "special": true
1715
+ },
1716
+ "151857": {
1717
+ "content": "|<EXTRA_TOKENS_211>|",
1718
+ "lstrip": false,
1719
+ "normalized": false,
1720
+ "rstrip": false,
1721
+ "single_word": false,
1722
+ "special": true
1723
+ },
1724
+ "151858": {
1725
+ "content": "|<EXTRA_TOKENS_212>|",
1726
+ "lstrip": false,
1727
+ "normalized": false,
1728
+ "rstrip": false,
1729
+ "single_word": false,
1730
+ "special": true
1731
+ },
1732
+ "151859": {
1733
+ "content": "|<EXTRA_TOKENS_213>|",
1734
+ "lstrip": false,
1735
+ "normalized": false,
1736
+ "rstrip": false,
1737
+ "single_word": false,
1738
+ "special": true
1739
+ },
1740
+ "151860": {
1741
+ "content": "|<EXTRA_TOKENS_214>|",
1742
+ "lstrip": false,
1743
+ "normalized": false,
1744
+ "rstrip": false,
1745
+ "single_word": false,
1746
+ "special": true
1747
+ },
1748
+ "151861": {
1749
+ "content": "|<EXTRA_TOKENS_215>|",
1750
+ "lstrip": false,
1751
+ "normalized": false,
1752
+ "rstrip": false,
1753
+ "single_word": false,
1754
+ "special": true
1755
+ },
1756
+ "151862": {
1757
+ "content": "|<EXTRA_TOKENS_216>|",
1758
+ "lstrip": false,
1759
+ "normalized": false,
1760
+ "rstrip": false,
1761
+ "single_word": false,
1762
+ "special": true
1763
+ },
1764
+ "151863": {
1765
+ "content": "|<EXTRA_TOKENS_217>|",
1766
+ "lstrip": false,
1767
+ "normalized": false,
1768
+ "rstrip": false,
1769
+ "single_word": false,
1770
+ "special": true
1771
+ },
1772
+ "151864": {
1773
+ "content": "|<EXTRA_TOKENS_218>|",
1774
+ "lstrip": false,
1775
+ "normalized": false,
1776
+ "rstrip": false,
1777
+ "single_word": false,
1778
+ "special": true
1779
+ },
1780
+ "151865": {
1781
+ "content": "|<EXTRA_TOKENS_219>|",
1782
+ "lstrip": false,
1783
+ "normalized": false,
1784
+ "rstrip": false,
1785
+ "single_word": false,
1786
+ "special": true
1787
+ },
1788
+ "151866": {
1789
+ "content": "|<EXTRA_TOKENS_220>|",
1790
+ "lstrip": false,
1791
+ "normalized": false,
1792
+ "rstrip": false,
1793
+ "single_word": false,
1794
+ "special": true
1795
+ },
1796
+ "151867": {
1797
+ "content": "|<EXTRA_TOKENS_221>|",
1798
+ "lstrip": false,
1799
+ "normalized": false,
1800
+ "rstrip": false,
1801
+ "single_word": false,
1802
+ "special": true
1803
+ },
1804
+ "151868": {
1805
+ "content": "|<EXTRA_TOKENS_222>|",
1806
+ "lstrip": false,
1807
+ "normalized": false,
1808
+ "rstrip": false,
1809
+ "single_word": false,
1810
+ "special": true
1811
+ },
1812
+ "151869": {
1813
+ "content": "|<EXTRA_TOKENS_223>|",
1814
+ "lstrip": false,
1815
+ "normalized": false,
1816
+ "rstrip": false,
1817
+ "single_word": false,
1818
+ "special": true
1819
+ },
1820
+ "151870": {
1821
+ "content": "|<EXTRA_TOKENS_224>|",
1822
+ "lstrip": false,
1823
+ "normalized": false,
1824
+ "rstrip": false,
1825
+ "single_word": false,
1826
+ "special": true
1827
+ },
1828
+ "151871": {
1829
+ "content": "|<EXTRA_TOKENS_225>|",
1830
+ "lstrip": false,
1831
+ "normalized": false,
1832
+ "rstrip": false,
1833
+ "single_word": false,
1834
+ "special": true
1835
+ },
1836
+ "151872": {
1837
+ "content": "|<EXTRA_TOKENS_226>|",
1838
+ "lstrip": false,
1839
+ "normalized": false,
1840
+ "rstrip": false,
1841
+ "single_word": false,
1842
+ "special": true
1843
+ },
1844
+ "151873": {
1845
+ "content": "|<EXTRA_TOKENS_227>|",
1846
+ "lstrip": false,
1847
+ "normalized": false,
1848
+ "rstrip": false,
1849
+ "single_word": false,
1850
+ "special": true
1851
+ },
1852
+ "151874": {
1853
+ "content": "|<EXTRA_TOKENS_228>|",
1854
+ "lstrip": false,
1855
+ "normalized": false,
1856
+ "rstrip": false,
1857
+ "single_word": false,
1858
+ "special": true
1859
+ },
1860
+ "151875": {
1861
+ "content": "|<EXTRA_TOKENS_229>|",
1862
+ "lstrip": false,
1863
+ "normalized": false,
1864
+ "rstrip": false,
1865
+ "single_word": false,
1866
+ "special": true
1867
+ },
1868
+ "151876": {
1869
+ "content": "|<EXTRA_TOKENS_230>|",
1870
+ "lstrip": false,
1871
+ "normalized": false,
1872
+ "rstrip": false,
1873
+ "single_word": false,
1874
+ "special": true
1875
+ },
1876
+ "151877": {
1877
+ "content": "|<EXTRA_TOKENS_231>|",
1878
+ "lstrip": false,
1879
+ "normalized": false,
1880
+ "rstrip": false,
1881
+ "single_word": false,
1882
+ "special": true
1883
+ },
1884
+ "151878": {
1885
+ "content": "|<EXTRA_TOKENS_232>|",
1886
+ "lstrip": false,
1887
+ "normalized": false,
1888
+ "rstrip": false,
1889
+ "single_word": false,
1890
+ "special": true
1891
+ },
1892
+ "151879": {
1893
+ "content": "|<EXTRA_TOKENS_233>|",
1894
+ "lstrip": false,
1895
+ "normalized": false,
1896
+ "rstrip": false,
1897
+ "single_word": false,
1898
+ "special": true
1899
+ },
1900
+ "151880": {
1901
+ "content": "|<EXTRA_TOKENS_234>|",
1902
+ "lstrip": false,
1903
+ "normalized": false,
1904
+ "rstrip": false,
1905
+ "single_word": false,
1906
+ "special": true
1907
+ },
1908
+ "151881": {
1909
+ "content": "|<EXTRA_TOKENS_235>|",
1910
+ "lstrip": false,
1911
+ "normalized": false,
1912
+ "rstrip": false,
1913
+ "single_word": false,
1914
+ "special": true
1915
+ },
1916
+ "151882": {
1917
+ "content": "|<EXTRA_TOKENS_236>|",
1918
+ "lstrip": false,
1919
+ "normalized": false,
1920
+ "rstrip": false,
1921
+ "single_word": false,
1922
+ "special": true
1923
+ },
1924
+ "151883": {
1925
+ "content": "|<EXTRA_TOKENS_237>|",
1926
+ "lstrip": false,
1927
+ "normalized": false,
1928
+ "rstrip": false,
1929
+ "single_word": false,
1930
+ "special": true
1931
+ },
1932
+ "151884": {
1933
+ "content": "|<EXTRA_TOKENS_238>|",
1934
+ "lstrip": false,
1935
+ "normalized": false,
1936
+ "rstrip": false,
1937
+ "single_word": false,
1938
+ "special": true
1939
+ },
1940
+ "151885": {
1941
+ "content": "|<EXTRA_TOKENS_239>|",
1942
+ "lstrip": false,
1943
+ "normalized": false,
1944
+ "rstrip": false,
1945
+ "single_word": false,
1946
+ "special": true
1947
+ },
1948
+ "151886": {
1949
+ "content": "|<EXTRA_TOKENS_240>|",
1950
+ "lstrip": false,
1951
+ "normalized": false,
1952
+ "rstrip": false,
1953
+ "single_word": false,
1954
+ "special": true
1955
+ },
1956
+ "151887": {
1957
+ "content": "|<EXTRA_TOKENS_241>|",
1958
+ "lstrip": false,
1959
+ "normalized": false,
1960
+ "rstrip": false,
1961
+ "single_word": false,
1962
+ "special": true
1963
+ },
1964
+ "151888": {
1965
+ "content": "|<EXTRA_TOKENS_242>|",
1966
+ "lstrip": false,
1967
+ "normalized": false,
1968
+ "rstrip": false,
1969
+ "single_word": false,
1970
+ "special": true
1971
+ },
1972
+ "151889": {
1973
+ "content": "|<EXTRA_TOKENS_243>|",
1974
+ "lstrip": false,
1975
+ "normalized": false,
1976
+ "rstrip": false,
1977
+ "single_word": false,
1978
+ "special": true
1979
+ },
1980
+ "151890": {
1981
+ "content": "|<EXTRA_TOKENS_244>|",
1982
+ "lstrip": false,
1983
+ "normalized": false,
1984
+ "rstrip": false,
1985
+ "single_word": false,
1986
+ "special": true
1987
+ },
1988
+ "151891": {
1989
+ "content": "|<EXTRA_TOKENS_245>|",
1990
+ "lstrip": false,
1991
+ "normalized": false,
1992
+ "rstrip": false,
1993
+ "single_word": false,
1994
+ "special": true
1995
+ },
1996
+ "151892": {
1997
+ "content": "|<EXTRA_TOKENS_246>|",
1998
+ "lstrip": false,
1999
+ "normalized": false,
2000
+ "rstrip": false,
2001
+ "single_word": false,
2002
+ "special": true
2003
+ },
2004
+ "151893": {
2005
+ "content": "|<EXTRA_TOKENS_247>|",
2006
+ "lstrip": false,
2007
+ "normalized": false,
2008
+ "rstrip": false,
2009
+ "single_word": false,
2010
+ "special": true
2011
+ },
2012
+ "151894": {
2013
+ "content": "|<EXTRA_TOKENS_248>|",
2014
+ "lstrip": false,
2015
+ "normalized": false,
2016
+ "rstrip": false,
2017
+ "single_word": false,
2018
+ "special": true
2019
+ },
2020
+ "151895": {
2021
+ "content": "|<EXTRA_TOKENS_249>|",
2022
+ "lstrip": false,
2023
+ "normalized": false,
2024
+ "rstrip": false,
2025
+ "single_word": false,
2026
+ "special": true
2027
+ },
2028
+ "151896": {
2029
+ "content": "|<EXTRA_TOKENS_250>|",
2030
+ "lstrip": false,
2031
+ "normalized": false,
2032
+ "rstrip": false,
2033
+ "single_word": false,
2034
+ "special": true
2035
+ },
2036
+ "151897": {
2037
+ "content": "|<EXTRA_TOKENS_251>|",
2038
+ "lstrip": false,
2039
+ "normalized": false,
2040
+ "rstrip": false,
2041
+ "single_word": false,
2042
+ "special": true
2043
+ },
2044
+ "151898": {
2045
+ "content": "|<EXTRA_TOKENS_252>|",
2046
+ "lstrip": false,
2047
+ "normalized": false,
2048
+ "rstrip": false,
2049
+ "single_word": false,
2050
+ "special": true
2051
+ },
2052
+ "151899": {
2053
+ "content": "|<EXTRA_TOKENS_253>|",
2054
+ "lstrip": false,
2055
+ "normalized": false,
2056
+ "rstrip": false,
2057
+ "single_word": false,
2058
+ "special": true
2059
+ },
2060
+ "151900": {
2061
+ "content": "|<EXTRA_TOKENS_254>|",
2062
+ "lstrip": false,
2063
+ "normalized": false,
2064
+ "rstrip": false,
2065
+ "single_word": false,
2066
+ "special": true
2067
+ },
2068
+ "151901": {
2069
+ "content": "|<EXTRA_TOKENS_255>|",
2070
+ "lstrip": false,
2071
+ "normalized": false,
2072
+ "rstrip": false,
2073
+ "single_word": false,
2074
+ "special": true
2075
+ },
2076
+ "151902": {
2077
+ "content": "|<EXTRA_TOKENS_256>|",
2078
+ "lstrip": false,
2079
+ "normalized": false,
2080
+ "rstrip": false,
2081
+ "single_word": false,
2082
+ "special": true
2083
+ },
2084
+ "151903": {
2085
+ "content": "|<EXTRA_TOKENS_257>|",
2086
+ "lstrip": false,
2087
+ "normalized": false,
2088
+ "rstrip": false,
2089
+ "single_word": false,
2090
+ "special": true
2091
+ },
2092
+ "151904": {
2093
+ "content": "|<EXTRA_TOKENS_258>|",
2094
+ "lstrip": false,
2095
+ "normalized": false,
2096
+ "rstrip": false,
2097
+ "single_word": false,
2098
+ "special": true
2099
+ },
2100
+ "151905": {
2101
+ "content": "|<EXTRA_TOKENS_259>|",
2102
+ "lstrip": false,
2103
+ "normalized": false,
2104
+ "rstrip": false,
2105
+ "single_word": false,
2106
+ "special": true
2107
+ },
2108
+ "151906": {
2109
+ "content": "|<EXTRA_TOKENS_260>|",
2110
+ "lstrip": false,
2111
+ "normalized": false,
2112
+ "rstrip": false,
2113
+ "single_word": false,
2114
+ "special": true
2115
+ },
2116
+ "151907": {
2117
+ "content": "|<EXTRA_TOKENS_261>|",
2118
+ "lstrip": false,
2119
+ "normalized": false,
2120
+ "rstrip": false,
2121
+ "single_word": false,
2122
+ "special": true
2123
+ },
2124
+ "151908": {
2125
+ "content": "|<EXTRA_TOKENS_262>|",
2126
+ "lstrip": false,
2127
+ "normalized": false,
2128
+ "rstrip": false,
2129
+ "single_word": false,
2130
+ "special": true
2131
+ },
2132
+ "151909": {
2133
+ "content": "|<EXTRA_TOKENS_263>|",
2134
+ "lstrip": false,
2135
+ "normalized": false,
2136
+ "rstrip": false,
2137
+ "single_word": false,
2138
+ "special": true
2139
+ },
2140
+ "151910": {
2141
+ "content": "|<EXTRA_TOKENS_264>|",
2142
+ "lstrip": false,
2143
+ "normalized": false,
2144
+ "rstrip": false,
2145
+ "single_word": false,
2146
+ "special": true
2147
+ },
2148
+ "151911": {
2149
+ "content": "|<EXTRA_TOKENS_265>|",
2150
+ "lstrip": false,
2151
+ "normalized": false,
2152
+ "rstrip": false,
2153
+ "single_word": false,
2154
+ "special": true
2155
+ },
2156
+ "151912": {
2157
+ "content": "|<EXTRA_TOKENS_266>|",
2158
+ "lstrip": false,
2159
+ "normalized": false,
2160
+ "rstrip": false,
2161
+ "single_word": false,
2162
+ "special": true
2163
+ },
2164
+ "151913": {
2165
+ "content": "|<EXTRA_TOKENS_267>|",
2166
+ "lstrip": false,
2167
+ "normalized": false,
2168
+ "rstrip": false,
2169
+ "single_word": false,
2170
+ "special": true
2171
+ },
2172
+ "151914": {
2173
+ "content": "|<EXTRA_TOKENS_268>|",
2174
+ "lstrip": false,
2175
+ "normalized": false,
2176
+ "rstrip": false,
2177
+ "single_word": false,
2178
+ "special": true
2179
+ },
2180
+ "151915": {
2181
+ "content": "|<EXTRA_TOKENS_269>|",
2182
+ "lstrip": false,
2183
+ "normalized": false,
2184
+ "rstrip": false,
2185
+ "single_word": false,
2186
+ "special": true
2187
+ },
2188
+ "151916": {
2189
+ "content": "|<EXTRA_TOKENS_270>|",
2190
+ "lstrip": false,
2191
+ "normalized": false,
2192
+ "rstrip": false,
2193
+ "single_word": false,
2194
+ "special": true
2195
+ },
2196
+ "151917": {
2197
+ "content": "|<EXTRA_TOKENS_271>|",
2198
+ "lstrip": false,
2199
+ "normalized": false,
2200
+ "rstrip": false,
2201
+ "single_word": false,
2202
+ "special": true
2203
+ },
2204
+ "151918": {
2205
+ "content": "|<EXTRA_TOKENS_272>|",
2206
+ "lstrip": false,
2207
+ "normalized": false,
2208
+ "rstrip": false,
2209
+ "single_word": false,
2210
+ "special": true
2211
+ },
2212
+ "151919": {
2213
+ "content": "|<EXTRA_TOKENS_273>|",
2214
+ "lstrip": false,
2215
+ "normalized": false,
2216
+ "rstrip": false,
2217
+ "single_word": false,
2218
+ "special": true
2219
+ },
2220
+ "151920": {
2221
+ "content": "|<EXTRA_TOKENS_274>|",
2222
+ "lstrip": false,
2223
+ "normalized": false,
2224
+ "rstrip": false,
2225
+ "single_word": false,
2226
+ "special": true
2227
+ },
2228
+ "151921": {
2229
+ "content": "|<EXTRA_TOKENS_275>|",
2230
+ "lstrip": false,
2231
+ "normalized": false,
2232
+ "rstrip": false,
2233
+ "single_word": false,
2234
+ "special": true
2235
+ },
2236
+ "151922": {
2237
+ "content": "|<EXTRA_TOKENS_276>|",
2238
+ "lstrip": false,
2239
+ "normalized": false,
2240
+ "rstrip": false,
2241
+ "single_word": false,
2242
+ "special": true
2243
+ },
2244
+ "151923": {
2245
+ "content": "|<EXTRA_TOKENS_277>|",
2246
+ "lstrip": false,
2247
+ "normalized": false,
2248
+ "rstrip": false,
2249
+ "single_word": false,
2250
+ "special": true
2251
+ },
2252
+ "151924": {
2253
+ "content": "|<EXTRA_TOKENS_278>|",
2254
+ "lstrip": false,
2255
+ "normalized": false,
2256
+ "rstrip": false,
2257
+ "single_word": false,
2258
+ "special": true
2259
+ },
2260
+ "151925": {
2261
+ "content": "|<EXTRA_TOKENS_279>|",
2262
+ "lstrip": false,
2263
+ "normalized": false,
2264
+ "rstrip": false,
2265
+ "single_word": false,
2266
+ "special": true
2267
+ },
2268
+ "151926": {
2269
+ "content": "|<EXTRA_TOKENS_280>|",
2270
+ "lstrip": false,
2271
+ "normalized": false,
2272
+ "rstrip": false,
2273
+ "single_word": false,
2274
+ "special": true
2275
+ },
2276
+ "151927": {
2277
+ "content": "|<EXTRA_TOKENS_281>|",
2278
+ "lstrip": false,
2279
+ "normalized": false,
2280
+ "rstrip": false,
2281
+ "single_word": false,
2282
+ "special": true
2283
+ },
2284
+ "151928": {
2285
+ "content": "|<EXTRA_TOKENS_282>|",
2286
+ "lstrip": false,
2287
+ "normalized": false,
2288
+ "rstrip": false,
2289
+ "single_word": false,
2290
+ "special": true
2291
+ },
2292
+ "151929": {
2293
+ "content": "|<EXTRA_TOKENS_283>|",
2294
+ "lstrip": false,
2295
+ "normalized": false,
2296
+ "rstrip": false,
2297
+ "single_word": false,
2298
+ "special": true
2299
+ },
2300
+ "151930": {
2301
+ "content": "|<EXTRA_TOKENS_284>|",
2302
+ "lstrip": false,
2303
+ "normalized": false,
2304
+ "rstrip": false,
2305
+ "single_word": false,
2306
+ "special": true
2307
+ },
2308
+ "151931": {
2309
+ "content": "|<EXTRA_TOKENS_285>|",
2310
+ "lstrip": false,
2311
+ "normalized": false,
2312
+ "rstrip": false,
2313
+ "single_word": false,
2314
+ "special": true
2315
+ },
2316
+ "151932": {
2317
+ "content": "|<EXTRA_TOKENS_286>|",
2318
+ "lstrip": false,
2319
+ "normalized": false,
2320
+ "rstrip": false,
2321
+ "single_word": false,
2322
+ "special": true
2323
+ },
2324
+ "151933": {
2325
+ "content": "|<EXTRA_TOKENS_287>|",
2326
+ "lstrip": false,
2327
+ "normalized": false,
2328
+ "rstrip": false,
2329
+ "single_word": false,
2330
+ "special": true
2331
+ },
2332
+ "151934": {
2333
+ "content": "|<EXTRA_TOKENS_288>|",
2334
+ "lstrip": false,
2335
+ "normalized": false,
2336
+ "rstrip": false,
2337
+ "single_word": false,
2338
+ "special": true
2339
+ },
2340
+ "151935": {
2341
+ "content": "|<EXTRA_TOKENS_289>|",
2342
+ "lstrip": false,
2343
+ "normalized": false,
2344
+ "rstrip": false,
2345
+ "single_word": false,
2346
+ "special": true
2347
+ },
2348
+ "151936": {
2349
+ "content": "|<EXTRA_TOKENS_290>|",
2350
+ "lstrip": false,
2351
+ "normalized": false,
2352
+ "rstrip": false,
2353
+ "single_word": false,
2354
+ "special": true
2355
+ },
2356
+ "151937": {
2357
+ "content": "|<EXTRA_TOKENS_291>|",
2358
+ "lstrip": false,
2359
+ "normalized": false,
2360
+ "rstrip": false,
2361
+ "single_word": false,
2362
+ "special": true
2363
+ },
2364
+ "151938": {
2365
+ "content": "|<EXTRA_TOKENS_292>|",
2366
+ "lstrip": false,
2367
+ "normalized": false,
2368
+ "rstrip": false,
2369
+ "single_word": false,
2370
+ "special": true
2371
+ },
2372
+ "151939": {
2373
+ "content": "|<EXTRA_TOKENS_293>|",
2374
+ "lstrip": false,
2375
+ "normalized": false,
2376
+ "rstrip": false,
2377
+ "single_word": false,
2378
+ "special": true
2379
+ },
2380
+ "151940": {
2381
+ "content": "|<EXTRA_TOKENS_294>|",
2382
+ "lstrip": false,
2383
+ "normalized": false,
2384
+ "rstrip": false,
2385
+ "single_word": false,
2386
+ "special": true
2387
+ },
2388
+ "151941": {
2389
+ "content": "|<EXTRA_TOKENS_295>|",
2390
+ "lstrip": false,
2391
+ "normalized": false,
2392
+ "rstrip": false,
2393
+ "single_word": false,
2394
+ "special": true
2395
+ },
2396
+ "151942": {
2397
+ "content": "|<EXTRA_TOKENS_296>|",
2398
+ "lstrip": false,
2399
+ "normalized": false,
2400
+ "rstrip": false,
2401
+ "single_word": false,
2402
+ "special": true
2403
+ },
2404
+ "151943": {
2405
+ "content": "|<EXTRA_TOKENS_297>|",
2406
+ "lstrip": false,
2407
+ "normalized": false,
2408
+ "rstrip": false,
2409
+ "single_word": false,
2410
+ "special": true
2411
+ },
2412
+ "151944": {
2413
+ "content": "|<EXTRA_TOKENS_298>|",
2414
+ "lstrip": false,
2415
+ "normalized": false,
2416
+ "rstrip": false,
2417
+ "single_word": false,
2418
+ "special": true
2419
+ },
2420
+ "151945": {
2421
+ "content": "|<EXTRA_TOKENS_299>|",
2422
+ "lstrip": false,
2423
+ "normalized": false,
2424
+ "rstrip": false,
2425
+ "single_word": false,
2426
+ "special": true
2427
+ },
2428
+ "151946": {
2429
+ "content": "|<EXTRA_TOKENS_300>|",
2430
+ "lstrip": false,
2431
+ "normalized": false,
2432
+ "rstrip": false,
2433
+ "single_word": false,
2434
+ "special": true
2435
+ },
2436
+ "151947": {
2437
+ "content": "|<EXTRA_TOKENS_301>|",
2438
+ "lstrip": false,
2439
+ "normalized": false,
2440
+ "rstrip": false,
2441
+ "single_word": false,
2442
+ "special": true
2443
+ },
2444
+ "151948": {
2445
+ "content": "|<EXTRA_TOKENS_302>|",
2446
+ "lstrip": false,
2447
+ "normalized": false,
2448
+ "rstrip": false,
2449
+ "single_word": false,
2450
+ "special": true
2451
+ },
2452
+ "151949": {
2453
+ "content": "|<EXTRA_TOKENS_303>|",
2454
+ "lstrip": false,
2455
+ "normalized": false,
2456
+ "rstrip": false,
2457
+ "single_word": false,
2458
+ "special": true
2459
+ },
2460
+ "151950": {
2461
+ "content": "|<EXTRA_TOKENS_304>|",
2462
+ "lstrip": false,
2463
+ "normalized": false,
2464
+ "rstrip": false,
2465
+ "single_word": false,
2466
+ "special": true
2467
+ },
2468
+ "151951": {
2469
+ "content": "|<EXTRA_TOKENS_305>|",
2470
+ "lstrip": false,
2471
+ "normalized": false,
2472
+ "rstrip": false,
2473
+ "single_word": false,
2474
+ "special": true
2475
+ },
2476
+ "151952": {
2477
+ "content": "|<EXTRA_TOKENS_306>|",
2478
+ "lstrip": false,
2479
+ "normalized": false,
2480
+ "rstrip": false,
2481
+ "single_word": false,
2482
+ "special": true
2483
+ },
2484
+ "151953": {
2485
+ "content": "|<EXTRA_TOKENS_307>|",
2486
+ "lstrip": false,
2487
+ "normalized": false,
2488
+ "rstrip": false,
2489
+ "single_word": false,
2490
+ "special": true
2491
+ },
2492
+ "151954": {
2493
+ "content": "|<EXTRA_TOKENS_308>|",
2494
+ "lstrip": false,
2495
+ "normalized": false,
2496
+ "rstrip": false,
2497
+ "single_word": false,
2498
+ "special": true
2499
+ },
2500
+ "151955": {
2501
+ "content": "|<EXTRA_TOKENS_309>|",
2502
+ "lstrip": false,
2503
+ "normalized": false,
2504
+ "rstrip": false,
2505
+ "single_word": false,
2506
+ "special": true
2507
+ },
2508
+ "151956": {
2509
+ "content": "|<EXTRA_TOKENS_310>|",
2510
+ "lstrip": false,
2511
+ "normalized": false,
2512
+ "rstrip": false,
2513
+ "single_word": false,
2514
+ "special": true
2515
+ },
2516
+ "151957": {
2517
+ "content": "|<EXTRA_TOKENS_311>|",
2518
+ "lstrip": false,
2519
+ "normalized": false,
2520
+ "rstrip": false,
2521
+ "single_word": false,
2522
+ "special": true
2523
+ },
2524
+ "151958": {
2525
+ "content": "|<EXTRA_TOKENS_312>|",
2526
+ "lstrip": false,
2527
+ "normalized": false,
2528
+ "rstrip": false,
2529
+ "single_word": false,
2530
+ "special": true
2531
+ },
2532
+ "151959": {
2533
+ "content": "|<EXTRA_TOKENS_313>|",
2534
+ "lstrip": false,
2535
+ "normalized": false,
2536
+ "rstrip": false,
2537
+ "single_word": false,
2538
+ "special": true
2539
+ },
2540
+ "151960": {
2541
+ "content": "|<EXTRA_TOKENS_314>|",
2542
+ "lstrip": false,
2543
+ "normalized": false,
2544
+ "rstrip": false,
2545
+ "single_word": false,
2546
+ "special": true
2547
+ },
2548
+ "151961": {
2549
+ "content": "|<EXTRA_TOKENS_315>|",
2550
+ "lstrip": false,
2551
+ "normalized": false,
2552
+ "rstrip": false,
2553
+ "single_word": false,
2554
+ "special": true
2555
+ },
2556
+ "151962": {
2557
+ "content": "|<EXTRA_TOKENS_316>|",
2558
+ "lstrip": false,
2559
+ "normalized": false,
2560
+ "rstrip": false,
2561
+ "single_word": false,
2562
+ "special": true
2563
+ },
2564
+ "151963": {
2565
+ "content": "|<EXTRA_TOKENS_317>|",
2566
+ "lstrip": false,
2567
+ "normalized": false,
2568
+ "rstrip": false,
2569
+ "single_word": false,
2570
+ "special": true
2571
+ },
2572
+ "151964": {
2573
+ "content": "|<EXTRA_TOKENS_318>|",
2574
+ "lstrip": false,
2575
+ "normalized": false,
2576
+ "rstrip": false,
2577
+ "single_word": false,
2578
+ "special": true
2579
+ },
2580
+ "151965": {
2581
+ "content": "|<EXTRA_TOKENS_319>|",
2582
+ "lstrip": false,
2583
+ "normalized": false,
2584
+ "rstrip": false,
2585
+ "single_word": false,
2586
+ "special": true
2587
+ },
2588
+ "151966": {
2589
+ "content": "|<EXTRA_TOKENS_320>|",
2590
+ "lstrip": false,
2591
+ "normalized": false,
2592
+ "rstrip": false,
2593
+ "single_word": false,
2594
+ "special": true
2595
+ },
2596
+ "151967": {
2597
+ "content": "|<EXTRA_TOKENS_321>|",
2598
+ "lstrip": false,
2599
+ "normalized": false,
2600
+ "rstrip": false,
2601
+ "single_word": false,
2602
+ "special": true
2603
+ },
2604
+ "151968": {
2605
+ "content": "|<EXTRA_TOKENS_322>|",
2606
+ "lstrip": false,
2607
+ "normalized": false,
2608
+ "rstrip": false,
2609
+ "single_word": false,
2610
+ "special": true
2611
+ },
2612
+ "151969": {
2613
+ "content": "|<EXTRA_TOKENS_323>|",
2614
+ "lstrip": false,
2615
+ "normalized": false,
2616
+ "rstrip": false,
2617
+ "single_word": false,
2618
+ "special": true
2619
+ },
2620
+ "151970": {
2621
+ "content": "|<EXTRA_TOKENS_324>|",
2622
+ "lstrip": false,
2623
+ "normalized": false,
2624
+ "rstrip": false,
2625
+ "single_word": false,
2626
+ "special": true
2627
+ },
2628
+ "151971": {
2629
+ "content": "|<EXTRA_TOKENS_325>|",
2630
+ "lstrip": false,
2631
+ "normalized": false,
2632
+ "rstrip": false,
2633
+ "single_word": false,
2634
+ "special": true
2635
+ },
2636
+ "151972": {
2637
+ "content": "|<EXTRA_TOKENS_326>|",
2638
+ "lstrip": false,
2639
+ "normalized": false,
2640
+ "rstrip": false,
2641
+ "single_word": false,
2642
+ "special": true
2643
+ },
2644
+ "151973": {
2645
+ "content": "|<EXTRA_TOKENS_327>|",
2646
+ "lstrip": false,
2647
+ "normalized": false,
2648
+ "rstrip": false,
2649
+ "single_word": false,
2650
+ "special": true
2651
+ },
2652
+ "151974": {
2653
+ "content": "|<EXTRA_TOKENS_328>|",
2654
+ "lstrip": false,
2655
+ "normalized": false,
2656
+ "rstrip": false,
2657
+ "single_word": false,
2658
+ "special": true
2659
+ },
2660
+ "151975": {
2661
+ "content": "|<EXTRA_TOKENS_329>|",
2662
+ "lstrip": false,
2663
+ "normalized": false,
2664
+ "rstrip": false,
2665
+ "single_word": false,
2666
+ "special": true
2667
+ },
2668
+ "151976": {
2669
+ "content": "|<EXTRA_TOKENS_330>|",
2670
+ "lstrip": false,
2671
+ "normalized": false,
2672
+ "rstrip": false,
2673
+ "single_word": false,
2674
+ "special": true
2675
+ },
2676
+ "151977": {
2677
+ "content": "|<EXTRA_TOKENS_331>|",
2678
+ "lstrip": false,
2679
+ "normalized": false,
2680
+ "rstrip": false,
2681
+ "single_word": false,
2682
+ "special": true
2683
+ },
2684
+ "151978": {
2685
+ "content": "|<EXTRA_TOKENS_332>|",
2686
+ "lstrip": false,
2687
+ "normalized": false,
2688
+ "rstrip": false,
2689
+ "single_word": false,
2690
+ "special": true
2691
+ },
2692
+ "151979": {
2693
+ "content": "|<EXTRA_TOKENS_333>|",
2694
+ "lstrip": false,
2695
+ "normalized": false,
2696
+ "rstrip": false,
2697
+ "single_word": false,
2698
+ "special": true
2699
+ },
2700
+ "151980": {
2701
+ "content": "|<EXTRA_TOKENS_334>|",
2702
+ "lstrip": false,
2703
+ "normalized": false,
2704
+ "rstrip": false,
2705
+ "single_word": false,
2706
+ "special": true
2707
+ },
2708
+ "151981": {
2709
+ "content": "|<EXTRA_TOKENS_335>|",
2710
+ "lstrip": false,
2711
+ "normalized": false,
2712
+ "rstrip": false,
2713
+ "single_word": false,
2714
+ "special": true
2715
+ },
2716
+ "151982": {
2717
+ "content": "|<EXTRA_TOKENS_336>|",
2718
+ "lstrip": false,
2719
+ "normalized": false,
2720
+ "rstrip": false,
2721
+ "single_word": false,
2722
+ "special": true
2723
+ },
2724
+ "151983": {
2725
+ "content": "|<EXTRA_TOKENS_337>|",
2726
+ "lstrip": false,
2727
+ "normalized": false,
2728
+ "rstrip": false,
2729
+ "single_word": false,
2730
+ "special": true
2731
+ },
2732
+ "151984": {
2733
+ "content": "|<EXTRA_TOKENS_338>|",
2734
+ "lstrip": false,
2735
+ "normalized": false,
2736
+ "rstrip": false,
2737
+ "single_word": false,
2738
+ "special": true
2739
+ },
2740
+ "151985": {
2741
+ "content": "|<EXTRA_TOKENS_339>|",
2742
+ "lstrip": false,
2743
+ "normalized": false,
2744
+ "rstrip": false,
2745
+ "single_word": false,
2746
+ "special": true
2747
+ },
2748
+ "151986": {
2749
+ "content": "|<EXTRA_TOKENS_340>|",
2750
+ "lstrip": false,
2751
+ "normalized": false,
2752
+ "rstrip": false,
2753
+ "single_word": false,
2754
+ "special": true
2755
+ },
2756
+ "151987": {
2757
+ "content": "|<EXTRA_TOKENS_341>|",
2758
+ "lstrip": false,
2759
+ "normalized": false,
2760
+ "rstrip": false,
2761
+ "single_word": false,
2762
+ "special": true
2763
+ },
2764
+ "151988": {
2765
+ "content": "|<EXTRA_TOKENS_342>|",
2766
+ "lstrip": false,
2767
+ "normalized": false,
2768
+ "rstrip": false,
2769
+ "single_word": false,
2770
+ "special": true
2771
+ },
2772
+ "151989": {
2773
+ "content": "|<EXTRA_TOKENS_343>|",
2774
+ "lstrip": false,
2775
+ "normalized": false,
2776
+ "rstrip": false,
2777
+ "single_word": false,
2778
+ "special": true
2779
+ },
2780
+ "151990": {
2781
+ "content": "|<EXTRA_TOKENS_344>|",
2782
+ "lstrip": false,
2783
+ "normalized": false,
2784
+ "rstrip": false,
2785
+ "single_word": false,
2786
+ "special": true
2787
+ },
2788
+ "151991": {
2789
+ "content": "|<EXTRA_TOKENS_345>|",
2790
+ "lstrip": false,
2791
+ "normalized": false,
2792
+ "rstrip": false,
2793
+ "single_word": false,
2794
+ "special": true
2795
+ },
2796
+ "151992": {
2797
+ "content": "|<EXTRA_TOKENS_346>|",
2798
+ "lstrip": false,
2799
+ "normalized": false,
2800
+ "rstrip": false,
2801
+ "single_word": false,
2802
+ "special": true
2803
+ },
2804
+ "151993": {
2805
+ "content": "|<EXTRA_TOKENS_347>|",
2806
+ "lstrip": false,
2807
+ "normalized": false,
2808
+ "rstrip": false,
2809
+ "single_word": false,
2810
+ "special": true
2811
+ },
2812
+ "151994": {
2813
+ "content": "|<EXTRA_TOKENS_348>|",
2814
+ "lstrip": false,
2815
+ "normalized": false,
2816
+ "rstrip": false,
2817
+ "single_word": false,
2818
+ "special": true
2819
+ },
2820
+ "151995": {
2821
+ "content": "|<EXTRA_TOKENS_349>|",
2822
+ "lstrip": false,
2823
+ "normalized": false,
2824
+ "rstrip": false,
2825
+ "single_word": false,
2826
+ "special": true
2827
+ },
2828
+ "151996": {
2829
+ "content": "|<EXTRA_TOKENS_350>|",
2830
+ "lstrip": false,
2831
+ "normalized": false,
2832
+ "rstrip": false,
2833
+ "single_word": false,
2834
+ "special": true
2835
+ },
2836
+ "151997": {
2837
+ "content": "|<EXTRA_TOKENS_351>|",
2838
+ "lstrip": false,
2839
+ "normalized": false,
2840
+ "rstrip": false,
2841
+ "single_word": false,
2842
+ "special": true
2843
+ },
2844
+ "151998": {
2845
+ "content": "|<EXTRA_TOKENS_352>|",
2846
+ "lstrip": false,
2847
+ "normalized": false,
2848
+ "rstrip": false,
2849
+ "single_word": false,
2850
+ "special": true
2851
+ },
2852
+ "151999": {
2853
+ "content": "|<EXTRA_TOKENS_353>|",
2854
+ "lstrip": false,
2855
+ "normalized": false,
2856
+ "rstrip": false,
2857
+ "single_word": false,
2858
+ "special": true
2859
+ },
2860
+ "152000": {
2861
+ "content": "|<EXTRA_TOKENS_354>|",
2862
+ "lstrip": false,
2863
+ "normalized": false,
2864
+ "rstrip": false,
2865
+ "single_word": false,
2866
+ "special": true
2867
+ },
2868
+ "152001": {
2869
+ "content": "|<EXTRA_TOKENS_355>|",
2870
+ "lstrip": false,
2871
+ "normalized": false,
2872
+ "rstrip": false,
2873
+ "single_word": false,
2874
+ "special": true
2875
+ },
2876
+ "152002": {
2877
+ "content": "|<EXTRA_TOKENS_356>|",
2878
+ "lstrip": false,
2879
+ "normalized": false,
2880
+ "rstrip": false,
2881
+ "single_word": false,
2882
+ "special": true
2883
+ },
2884
+ "152003": {
2885
+ "content": "|<EXTRA_TOKENS_357>|",
2886
+ "lstrip": false,
2887
+ "normalized": false,
2888
+ "rstrip": false,
2889
+ "single_word": false,
2890
+ "special": true
2891
+ },
2892
+ "152004": {
2893
+ "content": "|<EXTRA_TOKENS_358>|",
2894
+ "lstrip": false,
2895
+ "normalized": false,
2896
+ "rstrip": false,
2897
+ "single_word": false,
2898
+ "special": true
2899
+ },
2900
+ "152005": {
2901
+ "content": "|<EXTRA_TOKENS_359>|",
2902
+ "lstrip": false,
2903
+ "normalized": false,
2904
+ "rstrip": false,
2905
+ "single_word": false,
2906
+ "special": true
2907
+ },
2908
+ "152006": {
2909
+ "content": "|<EXTRA_TOKENS_360>|",
2910
+ "lstrip": false,
2911
+ "normalized": false,
2912
+ "rstrip": false,
2913
+ "single_word": false,
2914
+ "special": true
2915
+ },
2916
+ "152007": {
2917
+ "content": "|<EXTRA_TOKENS_361>|",
2918
+ "lstrip": false,
2919
+ "normalized": false,
2920
+ "rstrip": false,
2921
+ "single_word": false,
2922
+ "special": true
2923
+ },
2924
+ "152008": {
2925
+ "content": "|<EXTRA_TOKENS_362>|",
2926
+ "lstrip": false,
2927
+ "normalized": false,
2928
+ "rstrip": false,
2929
+ "single_word": false,
2930
+ "special": true
2931
+ },
2932
+ "152009": {
2933
+ "content": "|<EXTRA_TOKENS_363>|",
2934
+ "lstrip": false,
2935
+ "normalized": false,
2936
+ "rstrip": false,
2937
+ "single_word": false,
2938
+ "special": true
2939
+ },
2940
+ "152010": {
2941
+ "content": "|<EXTRA_TOKENS_364>|",
2942
+ "lstrip": false,
2943
+ "normalized": false,
2944
+ "rstrip": false,
2945
+ "single_word": false,
2946
+ "special": true
2947
+ },
2948
+ "152011": {
2949
+ "content": "|<EXTRA_TOKENS_365>|",
2950
+ "lstrip": false,
2951
+ "normalized": false,
2952
+ "rstrip": false,
2953
+ "single_word": false,
2954
+ "special": true
2955
+ },
2956
+ "152012": {
2957
+ "content": "|<EXTRA_TOKENS_366>|",
2958
+ "lstrip": false,
2959
+ "normalized": false,
2960
+ "rstrip": false,
2961
+ "single_word": false,
2962
+ "special": true
2963
+ },
2964
+ "152013": {
2965
+ "content": "|<EXTRA_TOKENS_367>|",
2966
+ "lstrip": false,
2967
+ "normalized": false,
2968
+ "rstrip": false,
2969
+ "single_word": false,
2970
+ "special": true
2971
+ },
2972
+ "152014": {
2973
+ "content": "|<EXTRA_TOKENS_368>|",
2974
+ "lstrip": false,
2975
+ "normalized": false,
2976
+ "rstrip": false,
2977
+ "single_word": false,
2978
+ "special": true
2979
+ },
2980
+ "152015": {
2981
+ "content": "|<EXTRA_TOKENS_369>|",
2982
+ "lstrip": false,
2983
+ "normalized": false,
2984
+ "rstrip": false,
2985
+ "single_word": false,
2986
+ "special": true
2987
+ },
2988
+ "152016": {
2989
+ "content": "|<EXTRA_TOKENS_370>|",
2990
+ "lstrip": false,
2991
+ "normalized": false,
2992
+ "rstrip": false,
2993
+ "single_word": false,
2994
+ "special": true
2995
+ },
2996
+ "152017": {
2997
+ "content": "|<EXTRA_TOKENS_371>|",
2998
+ "lstrip": false,
2999
+ "normalized": false,
3000
+ "rstrip": false,
3001
+ "single_word": false,
3002
+ "special": true
3003
+ },
3004
+ "152018": {
3005
+ "content": "|<EXTRA_TOKENS_372>|",
3006
+ "lstrip": false,
3007
+ "normalized": false,
3008
+ "rstrip": false,
3009
+ "single_word": false,
3010
+ "special": true
3011
+ },
3012
+ "152019": {
3013
+ "content": "|<EXTRA_TOKENS_373>|",
3014
+ "lstrip": false,
3015
+ "normalized": false,
3016
+ "rstrip": false,
3017
+ "single_word": false,
3018
+ "special": true
3019
+ },
3020
+ "152020": {
3021
+ "content": "|<EXTRA_TOKENS_374>|",
3022
+ "lstrip": false,
3023
+ "normalized": false,
3024
+ "rstrip": false,
3025
+ "single_word": false,
3026
+ "special": true
3027
+ },
3028
+ "152021": {
3029
+ "content": "|<EXTRA_TOKENS_375>|",
3030
+ "lstrip": false,
3031
+ "normalized": false,
3032
+ "rstrip": false,
3033
+ "single_word": false,
3034
+ "special": true
3035
+ },
3036
+ "152022": {
3037
+ "content": "|<EXTRA_TOKENS_376>|",
3038
+ "lstrip": false,
3039
+ "normalized": false,
3040
+ "rstrip": false,
3041
+ "single_word": false,
3042
+ "special": true
3043
+ },
3044
+ "152023": {
3045
+ "content": "|<EXTRA_TOKENS_377>|",
3046
+ "lstrip": false,
3047
+ "normalized": false,
3048
+ "rstrip": false,
3049
+ "single_word": false,
3050
+ "special": true
3051
+ },
3052
+ "152024": {
3053
+ "content": "|<EXTRA_TOKENS_378>|",
3054
+ "lstrip": false,
3055
+ "normalized": false,
3056
+ "rstrip": false,
3057
+ "single_word": false,
3058
+ "special": true
3059
+ },
3060
+ "152025": {
3061
+ "content": "|<EXTRA_TOKENS_379>|",
3062
+ "lstrip": false,
3063
+ "normalized": false,
3064
+ "rstrip": false,
3065
+ "single_word": false,
3066
+ "special": true
3067
+ },
3068
+ "152026": {
3069
+ "content": "|<EXTRA_TOKENS_380>|",
3070
+ "lstrip": false,
3071
+ "normalized": false,
3072
+ "rstrip": false,
3073
+ "single_word": false,
3074
+ "special": true
3075
+ },
3076
+ "152027": {
3077
+ "content": "|<EXTRA_TOKENS_381>|",
3078
+ "lstrip": false,
3079
+ "normalized": false,
3080
+ "rstrip": false,
3081
+ "single_word": false,
3082
+ "special": true
3083
+ },
3084
+ "152028": {
3085
+ "content": "|<EXTRA_TOKENS_382>|",
3086
+ "lstrip": false,
3087
+ "normalized": false,
3088
+ "rstrip": false,
3089
+ "single_word": false,
3090
+ "special": true
3091
+ },
3092
+ "152029": {
3093
+ "content": "|<EXTRA_TOKENS_383>|",
3094
+ "lstrip": false,
3095
+ "normalized": false,
3096
+ "rstrip": false,
3097
+ "single_word": false,
3098
+ "special": true
3099
+ },
3100
+ "152030": {
3101
+ "content": "|<EXTRA_TOKENS_384>|",
3102
+ "lstrip": false,
3103
+ "normalized": false,
3104
+ "rstrip": false,
3105
+ "single_word": false,
3106
+ "special": true
3107
+ },
3108
+ "152031": {
3109
+ "content": "|<EXTRA_TOKENS_385>|",
3110
+ "lstrip": false,
3111
+ "normalized": false,
3112
+ "rstrip": false,
3113
+ "single_word": false,
3114
+ "special": true
3115
+ },
3116
+ "152032": {
3117
+ "content": "|<EXTRA_TOKENS_386>|",
3118
+ "lstrip": false,
3119
+ "normalized": false,
3120
+ "rstrip": false,
3121
+ "single_word": false,
3122
+ "special": true
3123
+ },
3124
+ "152033": {
3125
+ "content": "|<EXTRA_TOKENS_387>|",
3126
+ "lstrip": false,
3127
+ "normalized": false,
3128
+ "rstrip": false,
3129
+ "single_word": false,
3130
+ "special": true
3131
+ },
3132
+ "152034": {
3133
+ "content": "|<EXTRA_TOKENS_388>|",
3134
+ "lstrip": false,
3135
+ "normalized": false,
3136
+ "rstrip": false,
3137
+ "single_word": false,
3138
+ "special": true
3139
+ },
3140
+ "152035": {
3141
+ "content": "|<EXTRA_TOKENS_389>|",
3142
+ "lstrip": false,
3143
+ "normalized": false,
3144
+ "rstrip": false,
3145
+ "single_word": false,
3146
+ "special": true
3147
+ },
3148
+ "152036": {
3149
+ "content": "|<EXTRA_TOKENS_390>|",
3150
+ "lstrip": false,
3151
+ "normalized": false,
3152
+ "rstrip": false,
3153
+ "single_word": false,
3154
+ "special": true
3155
+ },
3156
+ "152037": {
3157
+ "content": "|<EXTRA_TOKENS_391>|",
3158
+ "lstrip": false,
3159
+ "normalized": false,
3160
+ "rstrip": false,
3161
+ "single_word": false,
3162
+ "special": true
3163
+ },
3164
+ "152038": {
3165
+ "content": "|<EXTRA_TOKENS_392>|",
3166
+ "lstrip": false,
3167
+ "normalized": false,
3168
+ "rstrip": false,
3169
+ "single_word": false,
3170
+ "special": true
3171
+ },
3172
+ "152039": {
3173
+ "content": "|<EXTRA_TOKENS_393>|",
3174
+ "lstrip": false,
3175
+ "normalized": false,
3176
+ "rstrip": false,
3177
+ "single_word": false,
3178
+ "special": true
3179
+ },
3180
+ "152040": {
3181
+ "content": "|<EXTRA_TOKENS_394>|",
3182
+ "lstrip": false,
3183
+ "normalized": false,
3184
+ "rstrip": false,
3185
+ "single_word": false,
3186
+ "special": true
3187
+ },
3188
+ "152041": {
3189
+ "content": "|<EXTRA_TOKENS_395>|",
3190
+ "lstrip": false,
3191
+ "normalized": false,
3192
+ "rstrip": false,
3193
+ "single_word": false,
3194
+ "special": true
3195
+ },
3196
+ "152042": {
3197
+ "content": "|<EXTRA_TOKENS_396>|",
3198
+ "lstrip": false,
3199
+ "normalized": false,
3200
+ "rstrip": false,
3201
+ "single_word": false,
3202
+ "special": true
3203
+ },
3204
+ "152043": {
3205
+ "content": "|<EXTRA_TOKENS_397>|",
3206
+ "lstrip": false,
3207
+ "normalized": false,
3208
+ "rstrip": false,
3209
+ "single_word": false,
3210
+ "special": true
3211
+ },
3212
+ "152044": {
3213
+ "content": "|<EXTRA_TOKENS_398>|",
3214
+ "lstrip": false,
3215
+ "normalized": false,
3216
+ "rstrip": false,
3217
+ "single_word": false,
3218
+ "special": true
3219
+ },
3220
+ "152045": {
3221
+ "content": "|<EXTRA_TOKENS_399>|",
3222
+ "lstrip": false,
3223
+ "normalized": false,
3224
+ "rstrip": false,
3225
+ "single_word": false,
3226
+ "special": true
3227
+ },
3228
+ "152046": {
3229
+ "content": "|<EXTRA_TOKENS_400>|",
3230
+ "lstrip": false,
3231
+ "normalized": false,
3232
+ "rstrip": false,
3233
+ "single_word": false,
3234
+ "special": true
3235
+ },
3236
+ "152047": {
3237
+ "content": "|<EXTRA_TOKENS_401>|",
3238
+ "lstrip": false,
3239
+ "normalized": false,
3240
+ "rstrip": false,
3241
+ "single_word": false,
3242
+ "special": true
3243
+ },
3244
+ "152048": {
3245
+ "content": "|<EXTRA_TOKENS_402>|",
3246
+ "lstrip": false,
3247
+ "normalized": false,
3248
+ "rstrip": false,
3249
+ "single_word": false,
3250
+ "special": true
3251
+ },
3252
+ "152049": {
3253
+ "content": "|<EXTRA_TOKENS_403>|",
3254
+ "lstrip": false,
3255
+ "normalized": false,
3256
+ "rstrip": false,
3257
+ "single_word": false,
3258
+ "special": true
3259
+ },
3260
+ "152050": {
3261
+ "content": "|<EXTRA_TOKENS_404>|",
3262
+ "lstrip": false,
3263
+ "normalized": false,
3264
+ "rstrip": false,
3265
+ "single_word": false,
3266
+ "special": true
3267
+ },
3268
+ "152051": {
3269
+ "content": "|<EXTRA_TOKENS_405>|",
3270
+ "lstrip": false,
3271
+ "normalized": false,
3272
+ "rstrip": false,
3273
+ "single_word": false,
3274
+ "special": true
3275
+ },
3276
+ "152052": {
3277
+ "content": "|<EXTRA_TOKENS_406>|",
3278
+ "lstrip": false,
3279
+ "normalized": false,
3280
+ "rstrip": false,
3281
+ "single_word": false,
3282
+ "special": true
3283
+ },
3284
+ "152053": {
3285
+ "content": "|<EXTRA_TOKENS_407>|",
3286
+ "lstrip": false,
3287
+ "normalized": false,
3288
+ "rstrip": false,
3289
+ "single_word": false,
3290
+ "special": true
3291
+ },
3292
+ "152054": {
3293
+ "content": "|<EXTRA_TOKENS_408>|",
3294
+ "lstrip": false,
3295
+ "normalized": false,
3296
+ "rstrip": false,
3297
+ "single_word": false,
3298
+ "special": true
3299
+ },
3300
+ "152055": {
3301
+ "content": "|<EXTRA_TOKENS_409>|",
3302
+ "lstrip": false,
3303
+ "normalized": false,
3304
+ "rstrip": false,
3305
+ "single_word": false,
3306
+ "special": true
3307
+ },
3308
+ "152056": {
3309
+ "content": "|<EXTRA_TOKENS_410>|",
3310
+ "lstrip": false,
3311
+ "normalized": false,
3312
+ "rstrip": false,
3313
+ "single_word": false,
3314
+ "special": true
3315
+ },
3316
+ "152057": {
3317
+ "content": "|<EXTRA_TOKENS_411>|",
3318
+ "lstrip": false,
3319
+ "normalized": false,
3320
+ "rstrip": false,
3321
+ "single_word": false,
3322
+ "special": true
3323
+ },
3324
+ "152058": {
3325
+ "content": "|<EXTRA_TOKENS_412>|",
3326
+ "lstrip": false,
3327
+ "normalized": false,
3328
+ "rstrip": false,
3329
+ "single_word": false,
3330
+ "special": true
3331
+ },
3332
+ "152059": {
3333
+ "content": "|<EXTRA_TOKENS_413>|",
3334
+ "lstrip": false,
3335
+ "normalized": false,
3336
+ "rstrip": false,
3337
+ "single_word": false,
3338
+ "special": true
3339
+ },
3340
+ "152060": {
3341
+ "content": "|<EXTRA_TOKENS_414>|",
3342
+ "lstrip": false,
3343
+ "normalized": false,
3344
+ "rstrip": false,
3345
+ "single_word": false,
3346
+ "special": true
3347
+ },
3348
+ "152061": {
3349
+ "content": "|<EXTRA_TOKENS_415>|",
3350
+ "lstrip": false,
3351
+ "normalized": false,
3352
+ "rstrip": false,
3353
+ "single_word": false,
3354
+ "special": true
3355
+ },
3356
+ "152062": {
3357
+ "content": "|<EXTRA_TOKENS_416>|",
3358
+ "lstrip": false,
3359
+ "normalized": false,
3360
+ "rstrip": false,
3361
+ "single_word": false,
3362
+ "special": true
3363
+ },
3364
+ "152063": {
3365
+ "content": "|<EXTRA_TOKENS_417>|",
3366
+ "lstrip": false,
3367
+ "normalized": false,
3368
+ "rstrip": false,
3369
+ "single_word": false,
3370
+ "special": true
3371
+ },
3372
+ "152064": {
3373
+ "content": "<im_start>",
3374
+ "lstrip": false,
3375
+ "normalized": false,
3376
+ "rstrip": false,
3377
+ "single_word": false,
3378
+ "special": true
3379
+ },
3380
+ "152065": {
3381
+ "content": "<im_end>",
3382
+ "lstrip": false,
3383
+ "normalized": false,
3384
+ "rstrip": false,
3385
+ "single_word": false,
3386
+ "special": true
3387
+ },
3388
+ "152066": {
3389
+ "content": "<im_patch>",
3390
+ "lstrip": false,
3391
+ "normalized": false,
3392
+ "rstrip": false,
3393
+ "single_word": false,
3394
+ "special": true
3395
+ },
3396
+ "152067": {
3397
+ "content": "<im_col>",
3398
+ "lstrip": false,
3399
+ "normalized": false,
3400
+ "rstrip": false,
3401
+ "single_word": false,
3402
+ "special": true
3403
+ },
3404
+ "152068": {
3405
+ "content": "<|image|>",
3406
+ "lstrip": false,
3407
+ "normalized": false,
3408
+ "rstrip": false,
3409
+ "single_word": false,
3410
+ "special": true
3411
+ }
3412
+ },
3413
+ "additional_special_tokens": [
3414
+ "|<EXTRA_TOKENS_0>|",
3415
+ "|<EXTRA_TOKENS_1>|",
3416
+ "|<EXTRA_TOKENS_2>|",
3417
+ "|<EXTRA_TOKENS_3>|",
3418
+ "|<EXTRA_TOKENS_4>|",
3419
+ "|<EXTRA_TOKENS_5>|",
3420
+ "|<EXTRA_TOKENS_6>|",
3421
+ "|<EXTRA_TOKENS_7>|",
3422
+ "|<EXTRA_TOKENS_8>|",
3423
+ "|<EXTRA_TOKENS_9>|",
3424
+ "|<EXTRA_TOKENS_10>|",
3425
+ "|<EXTRA_TOKENS_11>|",
3426
+ "|<EXTRA_TOKENS_12>|",
3427
+ "|<EXTRA_TOKENS_13>|",
3428
+ "|<EXTRA_TOKENS_14>|",
3429
+ "|<EXTRA_TOKENS_15>|",
3430
+ "|<EXTRA_TOKENS_16>|",
3431
+ "|<EXTRA_TOKENS_17>|",
3432
+ "|<EXTRA_TOKENS_18>|",
3433
+ "|<EXTRA_TOKENS_19>|",
3434
+ "|<EXTRA_TOKENS_20>|",
3435
+ "|<EXTRA_TOKENS_21>|",
3436
+ "|<EXTRA_TOKENS_22>|",
3437
+ "|<EXTRA_TOKENS_23>|",
3438
+ "|<EXTRA_TOKENS_24>|",
3439
+ "|<EXTRA_TOKENS_25>|",
3440
+ "|<EXTRA_TOKENS_26>|",
3441
+ "|<EXTRA_TOKENS_27>|",
3442
+ "|<EXTRA_TOKENS_28>|",
3443
+ "|<EXTRA_TOKENS_29>|",
3444
+ "|<EXTRA_TOKENS_30>|",
3445
+ "|<EXTRA_TOKENS_31>|",
3446
+ "|<EXTRA_TOKENS_32>|",
3447
+ "|<EXTRA_TOKENS_33>|",
3448
+ "|<EXTRA_TOKENS_34>|",
3449
+ "|<EXTRA_TOKENS_35>|",
3450
+ "|<EXTRA_TOKENS_36>|",
3451
+ "|<EXTRA_TOKENS_37>|",
3452
+ "|<EXTRA_TOKENS_38>|",
3453
+ "|<EXTRA_TOKENS_39>|",
3454
+ "|<EXTRA_TOKENS_40>|",
3455
+ "|<EXTRA_TOKENS_41>|",
3456
+ "|<EXTRA_TOKENS_42>|",
3457
+ "|<EXTRA_TOKENS_43>|",
3458
+ "|<EXTRA_TOKENS_44>|",
3459
+ "|<EXTRA_TOKENS_45>|",
3460
+ "|<EXTRA_TOKENS_46>|",
3461
+ "|<EXTRA_TOKENS_47>|",
3462
+ "|<EXTRA_TOKENS_48>|",
3463
+ "|<EXTRA_TOKENS_49>|",
3464
+ "|<EXTRA_TOKENS_50>|",
3465
+ "|<EXTRA_TOKENS_51>|",
3466
+ "|<EXTRA_TOKENS_52>|",
3467
+ "|<EXTRA_TOKENS_53>|",
3468
+ "|<EXTRA_TOKENS_54>|",
3469
+ "|<EXTRA_TOKENS_55>|",
3470
+ "|<EXTRA_TOKENS_56>|",
3471
+ "|<EXTRA_TOKENS_57>|",
3472
+ "|<EXTRA_TOKENS_58>|",
3473
+ "|<EXTRA_TOKENS_59>|",
3474
+ "|<EXTRA_TOKENS_60>|",
3475
+ "|<EXTRA_TOKENS_61>|",
3476
+ "|<EXTRA_TOKENS_62>|",
3477
+ "|<EXTRA_TOKENS_63>|",
3478
+ "|<EXTRA_TOKENS_64>|",
3479
+ "|<EXTRA_TOKENS_65>|",
3480
+ "|<EXTRA_TOKENS_66>|",
3481
+ "|<EXTRA_TOKENS_67>|",
3482
+ "|<EXTRA_TOKENS_68>|",
3483
+ "|<EXTRA_TOKENS_69>|",
3484
+ "|<EXTRA_TOKENS_70>|",
3485
+ "|<EXTRA_TOKENS_71>|",
3486
+ "|<EXTRA_TOKENS_72>|",
3487
+ "|<EXTRA_TOKENS_73>|",
3488
+ "|<EXTRA_TOKENS_74>|",
3489
+ "|<EXTRA_TOKENS_75>|",
3490
+ "|<EXTRA_TOKENS_76>|",
3491
+ "|<EXTRA_TOKENS_77>|",
3492
+ "|<EXTRA_TOKENS_78>|",
3493
+ "|<EXTRA_TOKENS_79>|",
3494
+ "|<EXTRA_TOKENS_80>|",
3495
+ "|<EXTRA_TOKENS_81>|",
3496
+ "|<EXTRA_TOKENS_82>|",
3497
+ "|<EXTRA_TOKENS_83>|",
3498
+ "|<EXTRA_TOKENS_84>|",
3499
+ "|<EXTRA_TOKENS_85>|",
3500
+ "|<EXTRA_TOKENS_86>|",
3501
+ "|<EXTRA_TOKENS_87>|",
3502
+ "|<EXTRA_TOKENS_88>|",
3503
+ "|<EXTRA_TOKENS_89>|",
3504
+ "|<EXTRA_TOKENS_90>|",
3505
+ "|<EXTRA_TOKENS_91>|",
3506
+ "|<EXTRA_TOKENS_92>|",
3507
+ "|<EXTRA_TOKENS_93>|",
3508
+ "|<EXTRA_TOKENS_94>|",
3509
+ "|<EXTRA_TOKENS_95>|",
3510
+ "|<EXTRA_TOKENS_96>|",
3511
+ "|<EXTRA_TOKENS_97>|",
3512
+ "|<EXTRA_TOKENS_98>|",
3513
+ "|<EXTRA_TOKENS_99>|",
3514
+ "|<EXTRA_TOKENS_100>|",
3515
+ "|<EXTRA_TOKENS_101>|",
3516
+ "|<EXTRA_TOKENS_102>|",
3517
+ "|<EXTRA_TOKENS_103>|",
3518
+ "|<EXTRA_TOKENS_104>|",
3519
+ "|<EXTRA_TOKENS_105>|",
3520
+ "|<EXTRA_TOKENS_106>|",
3521
+ "|<EXTRA_TOKENS_107>|",
3522
+ "|<EXTRA_TOKENS_108>|",
3523
+ "|<EXTRA_TOKENS_109>|",
3524
+ "|<EXTRA_TOKENS_110>|",
3525
+ "|<EXTRA_TOKENS_111>|",
3526
+ "|<EXTRA_TOKENS_112>|",
3527
+ "|<EXTRA_TOKENS_113>|",
3528
+ "|<EXTRA_TOKENS_114>|",
3529
+ "|<EXTRA_TOKENS_115>|",
3530
+ "|<EXTRA_TOKENS_116>|",
3531
+ "|<EXTRA_TOKENS_117>|",
3532
+ "|<EXTRA_TOKENS_118>|",
3533
+ "|<EXTRA_TOKENS_119>|",
3534
+ "|<EXTRA_TOKENS_120>|",
3535
+ "|<EXTRA_TOKENS_121>|",
3536
+ "|<EXTRA_TOKENS_122>|",
3537
+ "|<EXTRA_TOKENS_123>|",
3538
+ "|<EXTRA_TOKENS_124>|",
3539
+ "|<EXTRA_TOKENS_125>|",
3540
+ "|<EXTRA_TOKENS_126>|",
3541
+ "|<EXTRA_TOKENS_127>|",
3542
+ "|<EXTRA_TOKENS_128>|",
3543
+ "|<EXTRA_TOKENS_129>|",
3544
+ "|<EXTRA_TOKENS_130>|",
3545
+ "|<EXTRA_TOKENS_131>|",
3546
+ "|<EXTRA_TOKENS_132>|",
3547
+ "|<EXTRA_TOKENS_133>|",
3548
+ "|<EXTRA_TOKENS_134>|",
3549
+ "|<EXTRA_TOKENS_135>|",
3550
+ "|<EXTRA_TOKENS_136>|",
3551
+ "|<EXTRA_TOKENS_137>|",
3552
+ "|<EXTRA_TOKENS_138>|",
3553
+ "|<EXTRA_TOKENS_139>|",
3554
+ "|<EXTRA_TOKENS_140>|",
3555
+ "|<EXTRA_TOKENS_141>|",
3556
+ "|<EXTRA_TOKENS_142>|",
3557
+ "|<EXTRA_TOKENS_143>|",
3558
+ "|<EXTRA_TOKENS_144>|",
3559
+ "|<EXTRA_TOKENS_145>|",
3560
+ "|<EXTRA_TOKENS_146>|",
3561
+ "|<EXTRA_TOKENS_147>|",
3562
+ "|<EXTRA_TOKENS_148>|",
3563
+ "|<EXTRA_TOKENS_149>|",
3564
+ "|<EXTRA_TOKENS_150>|",
3565
+ "|<EXTRA_TOKENS_151>|",
3566
+ "|<EXTRA_TOKENS_152>|",
3567
+ "|<EXTRA_TOKENS_153>|",
3568
+ "|<EXTRA_TOKENS_154>|",
3569
+ "|<EXTRA_TOKENS_155>|",
3570
+ "|<EXTRA_TOKENS_156>|",
3571
+ "|<EXTRA_TOKENS_157>|",
3572
+ "|<EXTRA_TOKENS_158>|",
3573
+ "|<EXTRA_TOKENS_159>|",
3574
+ "|<EXTRA_TOKENS_160>|",
3575
+ "|<EXTRA_TOKENS_161>|",
3576
+ "|<EXTRA_TOKENS_162>|",
3577
+ "|<EXTRA_TOKENS_163>|",
3578
+ "|<EXTRA_TOKENS_164>|",
3579
+ "|<EXTRA_TOKENS_165>|",
3580
+ "|<EXTRA_TOKENS_166>|",
3581
+ "|<EXTRA_TOKENS_167>|",
3582
+ "|<EXTRA_TOKENS_168>|",
3583
+ "|<EXTRA_TOKENS_169>|",
3584
+ "|<EXTRA_TOKENS_170>|",
3585
+ "|<EXTRA_TOKENS_171>|",
3586
+ "|<EXTRA_TOKENS_172>|",
3587
+ "|<EXTRA_TOKENS_173>|",
3588
+ "|<EXTRA_TOKENS_174>|",
3589
+ "|<EXTRA_TOKENS_175>|",
3590
+ "|<EXTRA_TOKENS_176>|",
3591
+ "|<EXTRA_TOKENS_177>|",
3592
+ "|<EXTRA_TOKENS_178>|",
3593
+ "|<EXTRA_TOKENS_179>|",
3594
+ "|<EXTRA_TOKENS_180>|",
3595
+ "|<EXTRA_TOKENS_181>|",
3596
+ "|<EXTRA_TOKENS_182>|",
3597
+ "|<EXTRA_TOKENS_183>|",
3598
+ "|<EXTRA_TOKENS_184>|",
3599
+ "|<EXTRA_TOKENS_185>|",
3600
+ "|<EXTRA_TOKENS_186>|",
3601
+ "|<EXTRA_TOKENS_187>|",
3602
+ "|<EXTRA_TOKENS_188>|",
3603
+ "|<EXTRA_TOKENS_189>|",
3604
+ "|<EXTRA_TOKENS_190>|",
3605
+ "|<EXTRA_TOKENS_191>|",
3606
+ "|<EXTRA_TOKENS_192>|",
3607
+ "|<EXTRA_TOKENS_193>|",
3608
+ "|<EXTRA_TOKENS_194>|",
3609
+ "|<EXTRA_TOKENS_195>|",
3610
+ "|<EXTRA_TOKENS_196>|",
3611
+ "|<EXTRA_TOKENS_197>|",
3612
+ "|<EXTRA_TOKENS_198>|",
3613
+ "|<EXTRA_TOKENS_199>|",
3614
+ "|<EXTRA_TOKENS_200>|",
3615
+ "|<EXTRA_TOKENS_201>|",
3616
+ "|<EXTRA_TOKENS_202>|",
3617
+ "|<EXTRA_TOKENS_203>|",
3618
+ "|<EXTRA_TOKENS_204>|",
3619
+ "|<EXTRA_TOKENS_205>|",
3620
+ "|<EXTRA_TOKENS_206>|",
3621
+ "|<EXTRA_TOKENS_207>|",
3622
+ "|<EXTRA_TOKENS_208>|",
3623
+ "|<EXTRA_TOKENS_209>|",
3624
+ "|<EXTRA_TOKENS_210>|",
3625
+ "|<EXTRA_TOKENS_211>|",
3626
+ "|<EXTRA_TOKENS_212>|",
3627
+ "|<EXTRA_TOKENS_213>|",
3628
+ "|<EXTRA_TOKENS_214>|",
3629
+ "|<EXTRA_TOKENS_215>|",
3630
+ "|<EXTRA_TOKENS_216>|",
3631
+ "|<EXTRA_TOKENS_217>|",
3632
+ "|<EXTRA_TOKENS_218>|",
3633
+ "|<EXTRA_TOKENS_219>|",
3634
+ "|<EXTRA_TOKENS_220>|",
3635
+ "|<EXTRA_TOKENS_221>|",
3636
+ "|<EXTRA_TOKENS_222>|",
3637
+ "|<EXTRA_TOKENS_223>|",
3638
+ "|<EXTRA_TOKENS_224>|",
3639
+ "|<EXTRA_TOKENS_225>|",
3640
+ "|<EXTRA_TOKENS_226>|",
3641
+ "|<EXTRA_TOKENS_227>|",
3642
+ "|<EXTRA_TOKENS_228>|",
3643
+ "|<EXTRA_TOKENS_229>|",
3644
+ "|<EXTRA_TOKENS_230>|",
3645
+ "|<EXTRA_TOKENS_231>|",
3646
+ "|<EXTRA_TOKENS_232>|",
3647
+ "|<EXTRA_TOKENS_233>|",
3648
+ "|<EXTRA_TOKENS_234>|",
3649
+ "|<EXTRA_TOKENS_235>|",
3650
+ "|<EXTRA_TOKENS_236>|",
3651
+ "|<EXTRA_TOKENS_237>|",
3652
+ "|<EXTRA_TOKENS_238>|",
3653
+ "|<EXTRA_TOKENS_239>|",
3654
+ "|<EXTRA_TOKENS_240>|",
3655
+ "|<EXTRA_TOKENS_241>|",
3656
+ "|<EXTRA_TOKENS_242>|",
3657
+ "|<EXTRA_TOKENS_243>|",
3658
+ "|<EXTRA_TOKENS_244>|",
3659
+ "|<EXTRA_TOKENS_245>|",
3660
+ "|<EXTRA_TOKENS_246>|",
3661
+ "|<EXTRA_TOKENS_247>|",
3662
+ "|<EXTRA_TOKENS_248>|",
3663
+ "|<EXTRA_TOKENS_249>|",
3664
+ "|<EXTRA_TOKENS_250>|",
3665
+ "|<EXTRA_TOKENS_251>|",
3666
+ "|<EXTRA_TOKENS_252>|",
3667
+ "|<EXTRA_TOKENS_253>|",
3668
+ "|<EXTRA_TOKENS_254>|",
3669
+ "|<EXTRA_TOKENS_255>|",
3670
+ "|<EXTRA_TOKENS_256>|",
3671
+ "|<EXTRA_TOKENS_257>|",
3672
+ "|<EXTRA_TOKENS_258>|",
3673
+ "|<EXTRA_TOKENS_259>|",
3674
+ "|<EXTRA_TOKENS_260>|",
3675
+ "|<EXTRA_TOKENS_261>|",
3676
+ "|<EXTRA_TOKENS_262>|",
3677
+ "|<EXTRA_TOKENS_263>|",
3678
+ "|<EXTRA_TOKENS_264>|",
3679
+ "|<EXTRA_TOKENS_265>|",
3680
+ "|<EXTRA_TOKENS_266>|",
3681
+ "|<EXTRA_TOKENS_267>|",
3682
+ "|<EXTRA_TOKENS_268>|",
3683
+ "|<EXTRA_TOKENS_269>|",
3684
+ "|<EXTRA_TOKENS_270>|",
3685
+ "|<EXTRA_TOKENS_271>|",
3686
+ "|<EXTRA_TOKENS_272>|",
3687
+ "|<EXTRA_TOKENS_273>|",
3688
+ "|<EXTRA_TOKENS_274>|",
3689
+ "|<EXTRA_TOKENS_275>|",
3690
+ "|<EXTRA_TOKENS_276>|",
3691
+ "|<EXTRA_TOKENS_277>|",
3692
+ "|<EXTRA_TOKENS_278>|",
3693
+ "|<EXTRA_TOKENS_279>|",
3694
+ "|<EXTRA_TOKENS_280>|",
3695
+ "|<EXTRA_TOKENS_281>|",
3696
+ "|<EXTRA_TOKENS_282>|",
3697
+ "|<EXTRA_TOKENS_283>|",
3698
+ "|<EXTRA_TOKENS_284>|",
3699
+ "|<EXTRA_TOKENS_285>|",
3700
+ "|<EXTRA_TOKENS_286>|",
3701
+ "|<EXTRA_TOKENS_287>|",
3702
+ "|<EXTRA_TOKENS_288>|",
3703
+ "|<EXTRA_TOKENS_289>|",
3704
+ "|<EXTRA_TOKENS_290>|",
3705
+ "|<EXTRA_TOKENS_291>|",
3706
+ "|<EXTRA_TOKENS_292>|",
3707
+ "|<EXTRA_TOKENS_293>|",
3708
+ "|<EXTRA_TOKENS_294>|",
3709
+ "|<EXTRA_TOKENS_295>|",
3710
+ "|<EXTRA_TOKENS_296>|",
3711
+ "|<EXTRA_TOKENS_297>|",
3712
+ "|<EXTRA_TOKENS_298>|",
3713
+ "|<EXTRA_TOKENS_299>|",
3714
+ "|<EXTRA_TOKENS_300>|",
3715
+ "|<EXTRA_TOKENS_301>|",
3716
+ "|<EXTRA_TOKENS_302>|",
3717
+ "|<EXTRA_TOKENS_303>|",
3718
+ "|<EXTRA_TOKENS_304>|",
3719
+ "|<EXTRA_TOKENS_305>|",
3720
+ "|<EXTRA_TOKENS_306>|",
3721
+ "|<EXTRA_TOKENS_307>|",
3722
+ "|<EXTRA_TOKENS_308>|",
3723
+ "|<EXTRA_TOKENS_309>|",
3724
+ "|<EXTRA_TOKENS_310>|",
3725
+ "|<EXTRA_TOKENS_311>|",
3726
+ "|<EXTRA_TOKENS_312>|",
3727
+ "|<EXTRA_TOKENS_313>|",
3728
+ "|<EXTRA_TOKENS_314>|",
3729
+ "|<EXTRA_TOKENS_315>|",
3730
+ "|<EXTRA_TOKENS_316>|",
3731
+ "|<EXTRA_TOKENS_317>|",
3732
+ "|<EXTRA_TOKENS_318>|",
3733
+ "|<EXTRA_TOKENS_319>|",
3734
+ "|<EXTRA_TOKENS_320>|",
3735
+ "|<EXTRA_TOKENS_321>|",
3736
+ "|<EXTRA_TOKENS_322>|",
3737
+ "|<EXTRA_TOKENS_323>|",
3738
+ "|<EXTRA_TOKENS_324>|",
3739
+ "|<EXTRA_TOKENS_325>|",
3740
+ "|<EXTRA_TOKENS_326>|",
3741
+ "|<EXTRA_TOKENS_327>|",
3742
+ "|<EXTRA_TOKENS_328>|",
3743
+ "|<EXTRA_TOKENS_329>|",
3744
+ "|<EXTRA_TOKENS_330>|",
3745
+ "|<EXTRA_TOKENS_331>|",
3746
+ "|<EXTRA_TOKENS_332>|",
3747
+ "|<EXTRA_TOKENS_333>|",
3748
+ "|<EXTRA_TOKENS_334>|",
3749
+ "|<EXTRA_TOKENS_335>|",
3750
+ "|<EXTRA_TOKENS_336>|",
3751
+ "|<EXTRA_TOKENS_337>|",
3752
+ "|<EXTRA_TOKENS_338>|",
3753
+ "|<EXTRA_TOKENS_339>|",
3754
+ "|<EXTRA_TOKENS_340>|",
3755
+ "|<EXTRA_TOKENS_341>|",
3756
+ "|<EXTRA_TOKENS_342>|",
3757
+ "|<EXTRA_TOKENS_343>|",
3758
+ "|<EXTRA_TOKENS_344>|",
3759
+ "|<EXTRA_TOKENS_345>|",
3760
+ "|<EXTRA_TOKENS_346>|",
3761
+ "|<EXTRA_TOKENS_347>|",
3762
+ "|<EXTRA_TOKENS_348>|",
3763
+ "|<EXTRA_TOKENS_349>|",
3764
+ "|<EXTRA_TOKENS_350>|",
3765
+ "|<EXTRA_TOKENS_351>|",
3766
+ "|<EXTRA_TOKENS_352>|",
3767
+ "|<EXTRA_TOKENS_353>|",
3768
+ "|<EXTRA_TOKENS_354>|",
3769
+ "|<EXTRA_TOKENS_355>|",
3770
+ "|<EXTRA_TOKENS_356>|",
3771
+ "|<EXTRA_TOKENS_357>|",
3772
+ "|<EXTRA_TOKENS_358>|",
3773
+ "|<EXTRA_TOKENS_359>|",
3774
+ "|<EXTRA_TOKENS_360>|",
3775
+ "|<EXTRA_TOKENS_361>|",
3776
+ "|<EXTRA_TOKENS_362>|",
3777
+ "|<EXTRA_TOKENS_363>|",
3778
+ "|<EXTRA_TOKENS_364>|",
3779
+ "|<EXTRA_TOKENS_365>|",
3780
+ "|<EXTRA_TOKENS_366>|",
3781
+ "|<EXTRA_TOKENS_367>|",
3782
+ "|<EXTRA_TOKENS_368>|",
3783
+ "|<EXTRA_TOKENS_369>|",
3784
+ "|<EXTRA_TOKENS_370>|",
3785
+ "|<EXTRA_TOKENS_371>|",
3786
+ "|<EXTRA_TOKENS_372>|",
3787
+ "|<EXTRA_TOKENS_373>|",
3788
+ "|<EXTRA_TOKENS_374>|",
3789
+ "|<EXTRA_TOKENS_375>|",
3790
+ "|<EXTRA_TOKENS_376>|",
3791
+ "|<EXTRA_TOKENS_377>|",
3792
+ "|<EXTRA_TOKENS_378>|",
3793
+ "|<EXTRA_TOKENS_379>|",
3794
+ "|<EXTRA_TOKENS_380>|",
3795
+ "|<EXTRA_TOKENS_381>|",
3796
+ "|<EXTRA_TOKENS_382>|",
3797
+ "|<EXTRA_TOKENS_383>|",
3798
+ "|<EXTRA_TOKENS_384>|",
3799
+ "|<EXTRA_TOKENS_385>|",
3800
+ "|<EXTRA_TOKENS_386>|",
3801
+ "|<EXTRA_TOKENS_387>|",
3802
+ "|<EXTRA_TOKENS_388>|",
3803
+ "|<EXTRA_TOKENS_389>|",
3804
+ "|<EXTRA_TOKENS_390>|",
3805
+ "|<EXTRA_TOKENS_391>|",
3806
+ "|<EXTRA_TOKENS_392>|",
3807
+ "|<EXTRA_TOKENS_393>|",
3808
+ "|<EXTRA_TOKENS_394>|",
3809
+ "|<EXTRA_TOKENS_395>|",
3810
+ "|<EXTRA_TOKENS_396>|",
3811
+ "|<EXTRA_TOKENS_397>|",
3812
+ "|<EXTRA_TOKENS_398>|",
3813
+ "|<EXTRA_TOKENS_399>|",
3814
+ "|<EXTRA_TOKENS_400>|",
3815
+ "|<EXTRA_TOKENS_401>|",
3816
+ "|<EXTRA_TOKENS_402>|",
3817
+ "|<EXTRA_TOKENS_403>|",
3818
+ "|<EXTRA_TOKENS_404>|",
3819
+ "|<EXTRA_TOKENS_405>|",
3820
+ "|<EXTRA_TOKENS_406>|",
3821
+ "|<EXTRA_TOKENS_407>|",
3822
+ "|<EXTRA_TOKENS_408>|",
3823
+ "|<EXTRA_TOKENS_409>|",
3824
+ "|<EXTRA_TOKENS_410>|",
3825
+ "|<EXTRA_TOKENS_411>|",
3826
+ "|<EXTRA_TOKENS_412>|",
3827
+ "|<EXTRA_TOKENS_413>|",
3828
+ "|<EXTRA_TOKENS_414>|",
3829
+ "|<EXTRA_TOKENS_415>|",
3830
+ "|<EXTRA_TOKENS_416>|",
3831
+ "|<EXTRA_TOKENS_417>|",
3832
+ "<im_start>",
3833
+ "<im_end>",
3834
+ "<im_patch>",
3835
+ "<im_col>",
3836
+ "<|image|>"
3837
+ ],
3838
+ "auto_map": {
3839
+ "AutoProcessor": "preprocessing_molmo.MolmoProcessor"
3840
+ },
3841
+ "bos_token": null,
3842
+ "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
3843
+ "clean_up_tokenization_spaces": false,
3844
+ "eos_token": "<|endoftext|>",
3845
+ "errors": "replace",
3846
+ "model_max_length": 32768,
3847
+ "pad_token": "<|endoftext|>",
3848
+ "processor_class": "MolmoProcessor",
3849
+ "split_special_tokens": false,
3850
+ "tokenizer_class": "Qwen2Tokenizer",
3851
+ "torch_dtype": "auto",
3852
+ "unk_token": null
3853
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff