Leon commited on
Commit
be00d5d
·
verified ·
1 Parent(s): d7e0fcd

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
added_tokens.json ADDED
@@ -0,0 +1,428 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "<im_col>": 152067,
3
+ "<im_end>": 152065,
4
+ "<im_patch>": 152066,
5
+ "<im_start>": 152064,
6
+ "<|endoftext|>": 151643,
7
+ "<|im_end|>": 151645,
8
+ "<|im_start|>": 151644,
9
+ "<|image|>": 152068,
10
+ "|<EXTRA_TOKENS_0>|": 151646,
11
+ "|<EXTRA_TOKENS_100>|": 151746,
12
+ "|<EXTRA_TOKENS_101>|": 151747,
13
+ "|<EXTRA_TOKENS_102>|": 151748,
14
+ "|<EXTRA_TOKENS_103>|": 151749,
15
+ "|<EXTRA_TOKENS_104>|": 151750,
16
+ "|<EXTRA_TOKENS_105>|": 151751,
17
+ "|<EXTRA_TOKENS_106>|": 151752,
18
+ "|<EXTRA_TOKENS_107>|": 151753,
19
+ "|<EXTRA_TOKENS_108>|": 151754,
20
+ "|<EXTRA_TOKENS_109>|": 151755,
21
+ "|<EXTRA_TOKENS_10>|": 151656,
22
+ "|<EXTRA_TOKENS_110>|": 151756,
23
+ "|<EXTRA_TOKENS_111>|": 151757,
24
+ "|<EXTRA_TOKENS_112>|": 151758,
25
+ "|<EXTRA_TOKENS_113>|": 151759,
26
+ "|<EXTRA_TOKENS_114>|": 151760,
27
+ "|<EXTRA_TOKENS_115>|": 151761,
28
+ "|<EXTRA_TOKENS_116>|": 151762,
29
+ "|<EXTRA_TOKENS_117>|": 151763,
30
+ "|<EXTRA_TOKENS_118>|": 151764,
31
+ "|<EXTRA_TOKENS_119>|": 151765,
32
+ "|<EXTRA_TOKENS_11>|": 151657,
33
+ "|<EXTRA_TOKENS_120>|": 151766,
34
+ "|<EXTRA_TOKENS_121>|": 151767,
35
+ "|<EXTRA_TOKENS_122>|": 151768,
36
+ "|<EXTRA_TOKENS_123>|": 151769,
37
+ "|<EXTRA_TOKENS_124>|": 151770,
38
+ "|<EXTRA_TOKENS_125>|": 151771,
39
+ "|<EXTRA_TOKENS_126>|": 151772,
40
+ "|<EXTRA_TOKENS_127>|": 151773,
41
+ "|<EXTRA_TOKENS_128>|": 151774,
42
+ "|<EXTRA_TOKENS_129>|": 151775,
43
+ "|<EXTRA_TOKENS_12>|": 151658,
44
+ "|<EXTRA_TOKENS_130>|": 151776,
45
+ "|<EXTRA_TOKENS_131>|": 151777,
46
+ "|<EXTRA_TOKENS_132>|": 151778,
47
+ "|<EXTRA_TOKENS_133>|": 151779,
48
+ "|<EXTRA_TOKENS_134>|": 151780,
49
+ "|<EXTRA_TOKENS_135>|": 151781,
50
+ "|<EXTRA_TOKENS_136>|": 151782,
51
+ "|<EXTRA_TOKENS_137>|": 151783,
52
+ "|<EXTRA_TOKENS_138>|": 151784,
53
+ "|<EXTRA_TOKENS_139>|": 151785,
54
+ "|<EXTRA_TOKENS_13>|": 151659,
55
+ "|<EXTRA_TOKENS_140>|": 151786,
56
+ "|<EXTRA_TOKENS_141>|": 151787,
57
+ "|<EXTRA_TOKENS_142>|": 151788,
58
+ "|<EXTRA_TOKENS_143>|": 151789,
59
+ "|<EXTRA_TOKENS_144>|": 151790,
60
+ "|<EXTRA_TOKENS_145>|": 151791,
61
+ "|<EXTRA_TOKENS_146>|": 151792,
62
+ "|<EXTRA_TOKENS_147>|": 151793,
63
+ "|<EXTRA_TOKENS_148>|": 151794,
64
+ "|<EXTRA_TOKENS_149>|": 151795,
65
+ "|<EXTRA_TOKENS_14>|": 151660,
66
+ "|<EXTRA_TOKENS_150>|": 151796,
67
+ "|<EXTRA_TOKENS_151>|": 151797,
68
+ "|<EXTRA_TOKENS_152>|": 151798,
69
+ "|<EXTRA_TOKENS_153>|": 151799,
70
+ "|<EXTRA_TOKENS_154>|": 151800,
71
+ "|<EXTRA_TOKENS_155>|": 151801,
72
+ "|<EXTRA_TOKENS_156>|": 151802,
73
+ "|<EXTRA_TOKENS_157>|": 151803,
74
+ "|<EXTRA_TOKENS_158>|": 151804,
75
+ "|<EXTRA_TOKENS_159>|": 151805,
76
+ "|<EXTRA_TOKENS_15>|": 151661,
77
+ "|<EXTRA_TOKENS_160>|": 151806,
78
+ "|<EXTRA_TOKENS_161>|": 151807,
79
+ "|<EXTRA_TOKENS_162>|": 151808,
80
+ "|<EXTRA_TOKENS_163>|": 151809,
81
+ "|<EXTRA_TOKENS_164>|": 151810,
82
+ "|<EXTRA_TOKENS_165>|": 151811,
83
+ "|<EXTRA_TOKENS_166>|": 151812,
84
+ "|<EXTRA_TOKENS_167>|": 151813,
85
+ "|<EXTRA_TOKENS_168>|": 151814,
86
+ "|<EXTRA_TOKENS_169>|": 151815,
87
+ "|<EXTRA_TOKENS_16>|": 151662,
88
+ "|<EXTRA_TOKENS_170>|": 151816,
89
+ "|<EXTRA_TOKENS_171>|": 151817,
90
+ "|<EXTRA_TOKENS_172>|": 151818,
91
+ "|<EXTRA_TOKENS_173>|": 151819,
92
+ "|<EXTRA_TOKENS_174>|": 151820,
93
+ "|<EXTRA_TOKENS_175>|": 151821,
94
+ "|<EXTRA_TOKENS_176>|": 151822,
95
+ "|<EXTRA_TOKENS_177>|": 151823,
96
+ "|<EXTRA_TOKENS_178>|": 151824,
97
+ "|<EXTRA_TOKENS_179>|": 151825,
98
+ "|<EXTRA_TOKENS_17>|": 151663,
99
+ "|<EXTRA_TOKENS_180>|": 151826,
100
+ "|<EXTRA_TOKENS_181>|": 151827,
101
+ "|<EXTRA_TOKENS_182>|": 151828,
102
+ "|<EXTRA_TOKENS_183>|": 151829,
103
+ "|<EXTRA_TOKENS_184>|": 151830,
104
+ "|<EXTRA_TOKENS_185>|": 151831,
105
+ "|<EXTRA_TOKENS_186>|": 151832,
106
+ "|<EXTRA_TOKENS_187>|": 151833,
107
+ "|<EXTRA_TOKENS_188>|": 151834,
108
+ "|<EXTRA_TOKENS_189>|": 151835,
109
+ "|<EXTRA_TOKENS_18>|": 151664,
110
+ "|<EXTRA_TOKENS_190>|": 151836,
111
+ "|<EXTRA_TOKENS_191>|": 151837,
112
+ "|<EXTRA_TOKENS_192>|": 151838,
113
+ "|<EXTRA_TOKENS_193>|": 151839,
114
+ "|<EXTRA_TOKENS_194>|": 151840,
115
+ "|<EXTRA_TOKENS_195>|": 151841,
116
+ "|<EXTRA_TOKENS_196>|": 151842,
117
+ "|<EXTRA_TOKENS_197>|": 151843,
118
+ "|<EXTRA_TOKENS_198>|": 151844,
119
+ "|<EXTRA_TOKENS_199>|": 151845,
120
+ "|<EXTRA_TOKENS_19>|": 151665,
121
+ "|<EXTRA_TOKENS_1>|": 151647,
122
+ "|<EXTRA_TOKENS_200>|": 151846,
123
+ "|<EXTRA_TOKENS_201>|": 151847,
124
+ "|<EXTRA_TOKENS_202>|": 151848,
125
+ "|<EXTRA_TOKENS_203>|": 151849,
126
+ "|<EXTRA_TOKENS_204>|": 151850,
127
+ "|<EXTRA_TOKENS_205>|": 151851,
128
+ "|<EXTRA_TOKENS_206>|": 151852,
129
+ "|<EXTRA_TOKENS_207>|": 151853,
130
+ "|<EXTRA_TOKENS_208>|": 151854,
131
+ "|<EXTRA_TOKENS_209>|": 151855,
132
+ "|<EXTRA_TOKENS_20>|": 151666,
133
+ "|<EXTRA_TOKENS_210>|": 151856,
134
+ "|<EXTRA_TOKENS_211>|": 151857,
135
+ "|<EXTRA_TOKENS_212>|": 151858,
136
+ "|<EXTRA_TOKENS_213>|": 151859,
137
+ "|<EXTRA_TOKENS_214>|": 151860,
138
+ "|<EXTRA_TOKENS_215>|": 151861,
139
+ "|<EXTRA_TOKENS_216>|": 151862,
140
+ "|<EXTRA_TOKENS_217>|": 151863,
141
+ "|<EXTRA_TOKENS_218>|": 151864,
142
+ "|<EXTRA_TOKENS_219>|": 151865,
143
+ "|<EXTRA_TOKENS_21>|": 151667,
144
+ "|<EXTRA_TOKENS_220>|": 151866,
145
+ "|<EXTRA_TOKENS_221>|": 151867,
146
+ "|<EXTRA_TOKENS_222>|": 151868,
147
+ "|<EXTRA_TOKENS_223>|": 151869,
148
+ "|<EXTRA_TOKENS_224>|": 151870,
149
+ "|<EXTRA_TOKENS_225>|": 151871,
150
+ "|<EXTRA_TOKENS_226>|": 151872,
151
+ "|<EXTRA_TOKENS_227>|": 151873,
152
+ "|<EXTRA_TOKENS_228>|": 151874,
153
+ "|<EXTRA_TOKENS_229>|": 151875,
154
+ "|<EXTRA_TOKENS_22>|": 151668,
155
+ "|<EXTRA_TOKENS_230>|": 151876,
156
+ "|<EXTRA_TOKENS_231>|": 151877,
157
+ "|<EXTRA_TOKENS_232>|": 151878,
158
+ "|<EXTRA_TOKENS_233>|": 151879,
159
+ "|<EXTRA_TOKENS_234>|": 151880,
160
+ "|<EXTRA_TOKENS_235>|": 151881,
161
+ "|<EXTRA_TOKENS_236>|": 151882,
162
+ "|<EXTRA_TOKENS_237>|": 151883,
163
+ "|<EXTRA_TOKENS_238>|": 151884,
164
+ "|<EXTRA_TOKENS_239>|": 151885,
165
+ "|<EXTRA_TOKENS_23>|": 151669,
166
+ "|<EXTRA_TOKENS_240>|": 151886,
167
+ "|<EXTRA_TOKENS_241>|": 151887,
168
+ "|<EXTRA_TOKENS_242>|": 151888,
169
+ "|<EXTRA_TOKENS_243>|": 151889,
170
+ "|<EXTRA_TOKENS_244>|": 151890,
171
+ "|<EXTRA_TOKENS_245>|": 151891,
172
+ "|<EXTRA_TOKENS_246>|": 151892,
173
+ "|<EXTRA_TOKENS_247>|": 151893,
174
+ "|<EXTRA_TOKENS_248>|": 151894,
175
+ "|<EXTRA_TOKENS_249>|": 151895,
176
+ "|<EXTRA_TOKENS_24>|": 151670,
177
+ "|<EXTRA_TOKENS_250>|": 151896,
178
+ "|<EXTRA_TOKENS_251>|": 151897,
179
+ "|<EXTRA_TOKENS_252>|": 151898,
180
+ "|<EXTRA_TOKENS_253>|": 151899,
181
+ "|<EXTRA_TOKENS_254>|": 151900,
182
+ "|<EXTRA_TOKENS_255>|": 151901,
183
+ "|<EXTRA_TOKENS_256>|": 151902,
184
+ "|<EXTRA_TOKENS_257>|": 151903,
185
+ "|<EXTRA_TOKENS_258>|": 151904,
186
+ "|<EXTRA_TOKENS_259>|": 151905,
187
+ "|<EXTRA_TOKENS_25>|": 151671,
188
+ "|<EXTRA_TOKENS_260>|": 151906,
189
+ "|<EXTRA_TOKENS_261>|": 151907,
190
+ "|<EXTRA_TOKENS_262>|": 151908,
191
+ "|<EXTRA_TOKENS_263>|": 151909,
192
+ "|<EXTRA_TOKENS_264>|": 151910,
193
+ "|<EXTRA_TOKENS_265>|": 151911,
194
+ "|<EXTRA_TOKENS_266>|": 151912,
195
+ "|<EXTRA_TOKENS_267>|": 151913,
196
+ "|<EXTRA_TOKENS_268>|": 151914,
197
+ "|<EXTRA_TOKENS_269>|": 151915,
198
+ "|<EXTRA_TOKENS_26>|": 151672,
199
+ "|<EXTRA_TOKENS_270>|": 151916,
200
+ "|<EXTRA_TOKENS_271>|": 151917,
201
+ "|<EXTRA_TOKENS_272>|": 151918,
202
+ "|<EXTRA_TOKENS_273>|": 151919,
203
+ "|<EXTRA_TOKENS_274>|": 151920,
204
+ "|<EXTRA_TOKENS_275>|": 151921,
205
+ "|<EXTRA_TOKENS_276>|": 151922,
206
+ "|<EXTRA_TOKENS_277>|": 151923,
207
+ "|<EXTRA_TOKENS_278>|": 151924,
208
+ "|<EXTRA_TOKENS_279>|": 151925,
209
+ "|<EXTRA_TOKENS_27>|": 151673,
210
+ "|<EXTRA_TOKENS_280>|": 151926,
211
+ "|<EXTRA_TOKENS_281>|": 151927,
212
+ "|<EXTRA_TOKENS_282>|": 151928,
213
+ "|<EXTRA_TOKENS_283>|": 151929,
214
+ "|<EXTRA_TOKENS_284>|": 151930,
215
+ "|<EXTRA_TOKENS_285>|": 151931,
216
+ "|<EXTRA_TOKENS_286>|": 151932,
217
+ "|<EXTRA_TOKENS_287>|": 151933,
218
+ "|<EXTRA_TOKENS_288>|": 151934,
219
+ "|<EXTRA_TOKENS_289>|": 151935,
220
+ "|<EXTRA_TOKENS_28>|": 151674,
221
+ "|<EXTRA_TOKENS_290>|": 151936,
222
+ "|<EXTRA_TOKENS_291>|": 151937,
223
+ "|<EXTRA_TOKENS_292>|": 151938,
224
+ "|<EXTRA_TOKENS_293>|": 151939,
225
+ "|<EXTRA_TOKENS_294>|": 151940,
226
+ "|<EXTRA_TOKENS_295>|": 151941,
227
+ "|<EXTRA_TOKENS_296>|": 151942,
228
+ "|<EXTRA_TOKENS_297>|": 151943,
229
+ "|<EXTRA_TOKENS_298>|": 151944,
230
+ "|<EXTRA_TOKENS_299>|": 151945,
231
+ "|<EXTRA_TOKENS_29>|": 151675,
232
+ "|<EXTRA_TOKENS_2>|": 151648,
233
+ "|<EXTRA_TOKENS_300>|": 151946,
234
+ "|<EXTRA_TOKENS_301>|": 151947,
235
+ "|<EXTRA_TOKENS_302>|": 151948,
236
+ "|<EXTRA_TOKENS_303>|": 151949,
237
+ "|<EXTRA_TOKENS_304>|": 151950,
238
+ "|<EXTRA_TOKENS_305>|": 151951,
239
+ "|<EXTRA_TOKENS_306>|": 151952,
240
+ "|<EXTRA_TOKENS_307>|": 151953,
241
+ "|<EXTRA_TOKENS_308>|": 151954,
242
+ "|<EXTRA_TOKENS_309>|": 151955,
243
+ "|<EXTRA_TOKENS_30>|": 151676,
244
+ "|<EXTRA_TOKENS_310>|": 151956,
245
+ "|<EXTRA_TOKENS_311>|": 151957,
246
+ "|<EXTRA_TOKENS_312>|": 151958,
247
+ "|<EXTRA_TOKENS_313>|": 151959,
248
+ "|<EXTRA_TOKENS_314>|": 151960,
249
+ "|<EXTRA_TOKENS_315>|": 151961,
250
+ "|<EXTRA_TOKENS_316>|": 151962,
251
+ "|<EXTRA_TOKENS_317>|": 151963,
252
+ "|<EXTRA_TOKENS_318>|": 151964,
253
+ "|<EXTRA_TOKENS_319>|": 151965,
254
+ "|<EXTRA_TOKENS_31>|": 151677,
255
+ "|<EXTRA_TOKENS_320>|": 151966,
256
+ "|<EXTRA_TOKENS_321>|": 151967,
257
+ "|<EXTRA_TOKENS_322>|": 151968,
258
+ "|<EXTRA_TOKENS_323>|": 151969,
259
+ "|<EXTRA_TOKENS_324>|": 151970,
260
+ "|<EXTRA_TOKENS_325>|": 151971,
261
+ "|<EXTRA_TOKENS_326>|": 151972,
262
+ "|<EXTRA_TOKENS_327>|": 151973,
263
+ "|<EXTRA_TOKENS_328>|": 151974,
264
+ "|<EXTRA_TOKENS_329>|": 151975,
265
+ "|<EXTRA_TOKENS_32>|": 151678,
266
+ "|<EXTRA_TOKENS_330>|": 151976,
267
+ "|<EXTRA_TOKENS_331>|": 151977,
268
+ "|<EXTRA_TOKENS_332>|": 151978,
269
+ "|<EXTRA_TOKENS_333>|": 151979,
270
+ "|<EXTRA_TOKENS_334>|": 151980,
271
+ "|<EXTRA_TOKENS_335>|": 151981,
272
+ "|<EXTRA_TOKENS_336>|": 151982,
273
+ "|<EXTRA_TOKENS_337>|": 151983,
274
+ "|<EXTRA_TOKENS_338>|": 151984,
275
+ "|<EXTRA_TOKENS_339>|": 151985,
276
+ "|<EXTRA_TOKENS_33>|": 151679,
277
+ "|<EXTRA_TOKENS_340>|": 151986,
278
+ "|<EXTRA_TOKENS_341>|": 151987,
279
+ "|<EXTRA_TOKENS_342>|": 151988,
280
+ "|<EXTRA_TOKENS_343>|": 151989,
281
+ "|<EXTRA_TOKENS_344>|": 151990,
282
+ "|<EXTRA_TOKENS_345>|": 151991,
283
+ "|<EXTRA_TOKENS_346>|": 151992,
284
+ "|<EXTRA_TOKENS_347>|": 151993,
285
+ "|<EXTRA_TOKENS_348>|": 151994,
286
+ "|<EXTRA_TOKENS_349>|": 151995,
287
+ "|<EXTRA_TOKENS_34>|": 151680,
288
+ "|<EXTRA_TOKENS_350>|": 151996,
289
+ "|<EXTRA_TOKENS_351>|": 151997,
290
+ "|<EXTRA_TOKENS_352>|": 151998,
291
+ "|<EXTRA_TOKENS_353>|": 151999,
292
+ "|<EXTRA_TOKENS_354>|": 152000,
293
+ "|<EXTRA_TOKENS_355>|": 152001,
294
+ "|<EXTRA_TOKENS_356>|": 152002,
295
+ "|<EXTRA_TOKENS_357>|": 152003,
296
+ "|<EXTRA_TOKENS_358>|": 152004,
297
+ "|<EXTRA_TOKENS_359>|": 152005,
298
+ "|<EXTRA_TOKENS_35>|": 151681,
299
+ "|<EXTRA_TOKENS_360>|": 152006,
300
+ "|<EXTRA_TOKENS_361>|": 152007,
301
+ "|<EXTRA_TOKENS_362>|": 152008,
302
+ "|<EXTRA_TOKENS_363>|": 152009,
303
+ "|<EXTRA_TOKENS_364>|": 152010,
304
+ "|<EXTRA_TOKENS_365>|": 152011,
305
+ "|<EXTRA_TOKENS_366>|": 152012,
306
+ "|<EXTRA_TOKENS_367>|": 152013,
307
+ "|<EXTRA_TOKENS_368>|": 152014,
308
+ "|<EXTRA_TOKENS_369>|": 152015,
309
+ "|<EXTRA_TOKENS_36>|": 151682,
310
+ "|<EXTRA_TOKENS_370>|": 152016,
311
+ "|<EXTRA_TOKENS_371>|": 152017,
312
+ "|<EXTRA_TOKENS_372>|": 152018,
313
+ "|<EXTRA_TOKENS_373>|": 152019,
314
+ "|<EXTRA_TOKENS_374>|": 152020,
315
+ "|<EXTRA_TOKENS_375>|": 152021,
316
+ "|<EXTRA_TOKENS_376>|": 152022,
317
+ "|<EXTRA_TOKENS_377>|": 152023,
318
+ "|<EXTRA_TOKENS_378>|": 152024,
319
+ "|<EXTRA_TOKENS_379>|": 152025,
320
+ "|<EXTRA_TOKENS_37>|": 151683,
321
+ "|<EXTRA_TOKENS_380>|": 152026,
322
+ "|<EXTRA_TOKENS_381>|": 152027,
323
+ "|<EXTRA_TOKENS_382>|": 152028,
324
+ "|<EXTRA_TOKENS_383>|": 152029,
325
+ "|<EXTRA_TOKENS_384>|": 152030,
326
+ "|<EXTRA_TOKENS_385>|": 152031,
327
+ "|<EXTRA_TOKENS_386>|": 152032,
328
+ "|<EXTRA_TOKENS_387>|": 152033,
329
+ "|<EXTRA_TOKENS_388>|": 152034,
330
+ "|<EXTRA_TOKENS_389>|": 152035,
331
+ "|<EXTRA_TOKENS_38>|": 151684,
332
+ "|<EXTRA_TOKENS_390>|": 152036,
333
+ "|<EXTRA_TOKENS_391>|": 152037,
334
+ "|<EXTRA_TOKENS_392>|": 152038,
335
+ "|<EXTRA_TOKENS_393>|": 152039,
336
+ "|<EXTRA_TOKENS_394>|": 152040,
337
+ "|<EXTRA_TOKENS_395>|": 152041,
338
+ "|<EXTRA_TOKENS_396>|": 152042,
339
+ "|<EXTRA_TOKENS_397>|": 152043,
340
+ "|<EXTRA_TOKENS_398>|": 152044,
341
+ "|<EXTRA_TOKENS_399>|": 152045,
342
+ "|<EXTRA_TOKENS_39>|": 151685,
343
+ "|<EXTRA_TOKENS_3>|": 151649,
344
+ "|<EXTRA_TOKENS_400>|": 152046,
345
+ "|<EXTRA_TOKENS_401>|": 152047,
346
+ "|<EXTRA_TOKENS_402>|": 152048,
347
+ "|<EXTRA_TOKENS_403>|": 152049,
348
+ "|<EXTRA_TOKENS_404>|": 152050,
349
+ "|<EXTRA_TOKENS_405>|": 152051,
350
+ "|<EXTRA_TOKENS_406>|": 152052,
351
+ "|<EXTRA_TOKENS_407>|": 152053,
352
+ "|<EXTRA_TOKENS_408>|": 152054,
353
+ "|<EXTRA_TOKENS_409>|": 152055,
354
+ "|<EXTRA_TOKENS_40>|": 151686,
355
+ "|<EXTRA_TOKENS_410>|": 152056,
356
+ "|<EXTRA_TOKENS_411>|": 152057,
357
+ "|<EXTRA_TOKENS_412>|": 152058,
358
+ "|<EXTRA_TOKENS_413>|": 152059,
359
+ "|<EXTRA_TOKENS_414>|": 152060,
360
+ "|<EXTRA_TOKENS_415>|": 152061,
361
+ "|<EXTRA_TOKENS_416>|": 152062,
362
+ "|<EXTRA_TOKENS_417>|": 152063,
363
+ "|<EXTRA_TOKENS_41>|": 151687,
364
+ "|<EXTRA_TOKENS_42>|": 151688,
365
+ "|<EXTRA_TOKENS_43>|": 151689,
366
+ "|<EXTRA_TOKENS_44>|": 151690,
367
+ "|<EXTRA_TOKENS_45>|": 151691,
368
+ "|<EXTRA_TOKENS_46>|": 151692,
369
+ "|<EXTRA_TOKENS_47>|": 151693,
370
+ "|<EXTRA_TOKENS_48>|": 151694,
371
+ "|<EXTRA_TOKENS_49>|": 151695,
372
+ "|<EXTRA_TOKENS_4>|": 151650,
373
+ "|<EXTRA_TOKENS_50>|": 151696,
374
+ "|<EXTRA_TOKENS_51>|": 151697,
375
+ "|<EXTRA_TOKENS_52>|": 151698,
376
+ "|<EXTRA_TOKENS_53>|": 151699,
377
+ "|<EXTRA_TOKENS_54>|": 151700,
378
+ "|<EXTRA_TOKENS_55>|": 151701,
379
+ "|<EXTRA_TOKENS_56>|": 151702,
380
+ "|<EXTRA_TOKENS_57>|": 151703,
381
+ "|<EXTRA_TOKENS_58>|": 151704,
382
+ "|<EXTRA_TOKENS_59>|": 151705,
383
+ "|<EXTRA_TOKENS_5>|": 151651,
384
+ "|<EXTRA_TOKENS_60>|": 151706,
385
+ "|<EXTRA_TOKENS_61>|": 151707,
386
+ "|<EXTRA_TOKENS_62>|": 151708,
387
+ "|<EXTRA_TOKENS_63>|": 151709,
388
+ "|<EXTRA_TOKENS_64>|": 151710,
389
+ "|<EXTRA_TOKENS_65>|": 151711,
390
+ "|<EXTRA_TOKENS_66>|": 151712,
391
+ "|<EXTRA_TOKENS_67>|": 151713,
392
+ "|<EXTRA_TOKENS_68>|": 151714,
393
+ "|<EXTRA_TOKENS_69>|": 151715,
394
+ "|<EXTRA_TOKENS_6>|": 151652,
395
+ "|<EXTRA_TOKENS_70>|": 151716,
396
+ "|<EXTRA_TOKENS_71>|": 151717,
397
+ "|<EXTRA_TOKENS_72>|": 151718,
398
+ "|<EXTRA_TOKENS_73>|": 151719,
399
+ "|<EXTRA_TOKENS_74>|": 151720,
400
+ "|<EXTRA_TOKENS_75>|": 151721,
401
+ "|<EXTRA_TOKENS_76>|": 151722,
402
+ "|<EXTRA_TOKENS_77>|": 151723,
403
+ "|<EXTRA_TOKENS_78>|": 151724,
404
+ "|<EXTRA_TOKENS_79>|": 151725,
405
+ "|<EXTRA_TOKENS_7>|": 151653,
406
+ "|<EXTRA_TOKENS_80>|": 151726,
407
+ "|<EXTRA_TOKENS_81>|": 151727,
408
+ "|<EXTRA_TOKENS_82>|": 151728,
409
+ "|<EXTRA_TOKENS_83>|": 151729,
410
+ "|<EXTRA_TOKENS_84>|": 151730,
411
+ "|<EXTRA_TOKENS_85>|": 151731,
412
+ "|<EXTRA_TOKENS_86>|": 151732,
413
+ "|<EXTRA_TOKENS_87>|": 151733,
414
+ "|<EXTRA_TOKENS_88>|": 151734,
415
+ "|<EXTRA_TOKENS_89>|": 151735,
416
+ "|<EXTRA_TOKENS_8>|": 151654,
417
+ "|<EXTRA_TOKENS_90>|": 151736,
418
+ "|<EXTRA_TOKENS_91>|": 151737,
419
+ "|<EXTRA_TOKENS_92>|": 151738,
420
+ "|<EXTRA_TOKENS_93>|": 151739,
421
+ "|<EXTRA_TOKENS_94>|": 151740,
422
+ "|<EXTRA_TOKENS_95>|": 151741,
423
+ "|<EXTRA_TOKENS_96>|": 151742,
424
+ "|<EXTRA_TOKENS_97>|": 151743,
425
+ "|<EXTRA_TOKENS_98>|": 151744,
426
+ "|<EXTRA_TOKENS_99>|": 151745,
427
+ "|<EXTRA_TOKENS_9>|": 151655
428
+ }
config.json ADDED
@@ -0,0 +1,220 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/root/.cache/huggingface/hub/models--allenai--Molmo-7B-D-0924/snapshots/1721478b71306fb7dc671176d5c204dc7a4d27d7",
3
+ "architectures": [
4
+ "MolmoForCausalLM"
5
+ ],
6
+ "attention_layer_norm": false,
7
+ "auto_map": {
8
+ "AutoConfig": "config_molmo.MolmoConfig",
9
+ "AutoModelForCausalLM": "modeling_molmo.MolmoForCausalLM"
10
+ },
11
+ "clip_qkv": null,
12
+ "embedding_size": 152064,
13
+ "hidden_size": 3584,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 37888,
16
+ "layer_norm_eps": 1e-06,
17
+ "layer_norm_type": "rms",
18
+ "max_position_embeddings": 4096,
19
+ "model_type": "molmo",
20
+ "norm_after": false,
21
+ "num_attention_heads": 28,
22
+ "num_hidden_layers": 28,
23
+ "num_key_value_heads": 4,
24
+ "qkv_bias": true,
25
+ "quantization_config": {
26
+ "config_groups": {
27
+ "group_0": {
28
+ "input_activations": {
29
+ "actorder": null,
30
+ "block_structure": null,
31
+ "dynamic": true,
32
+ "group_size": null,
33
+ "num_bits": 8,
34
+ "observer": null,
35
+ "observer_kwargs": {},
36
+ "strategy": "token",
37
+ "symmetric": true,
38
+ "type": "float"
39
+ },
40
+ "output_activations": null,
41
+ "targets": [
42
+ "Linear"
43
+ ],
44
+ "weights": {
45
+ "actorder": null,
46
+ "block_structure": null,
47
+ "dynamic": false,
48
+ "group_size": null,
49
+ "num_bits": 8,
50
+ "observer": "minmax",
51
+ "observer_kwargs": {},
52
+ "strategy": "channel",
53
+ "symmetric": true,
54
+ "type": "float"
55
+ }
56
+ }
57
+ },
58
+ "format": "float-quantized",
59
+ "global_compression_ratio": 1.2045747559103401,
60
+ "ignore": [
61
+ "model.vision_backbone.image_vit.patch_embedding",
62
+ "model.vision_backbone.image_vit.transformer.resblocks.0.attention.wq",
63
+ "model.vision_backbone.image_vit.transformer.resblocks.0.attention.wk",
64
+ "model.vision_backbone.image_vit.transformer.resblocks.0.attention.wv",
65
+ "model.vision_backbone.image_vit.transformer.resblocks.0.attention.wo",
66
+ "model.vision_backbone.image_vit.transformer.resblocks.0.feed_forward.w1",
67
+ "model.vision_backbone.image_vit.transformer.resblocks.0.feed_forward.w2",
68
+ "model.vision_backbone.image_vit.transformer.resblocks.1.attention.wq",
69
+ "model.vision_backbone.image_vit.transformer.resblocks.1.attention.wk",
70
+ "model.vision_backbone.image_vit.transformer.resblocks.1.attention.wv",
71
+ "model.vision_backbone.image_vit.transformer.resblocks.1.attention.wo",
72
+ "model.vision_backbone.image_vit.transformer.resblocks.1.feed_forward.w1",
73
+ "model.vision_backbone.image_vit.transformer.resblocks.1.feed_forward.w2",
74
+ "model.vision_backbone.image_vit.transformer.resblocks.2.attention.wq",
75
+ "model.vision_backbone.image_vit.transformer.resblocks.2.attention.wk",
76
+ "model.vision_backbone.image_vit.transformer.resblocks.2.attention.wv",
77
+ "model.vision_backbone.image_vit.transformer.resblocks.2.attention.wo",
78
+ "model.vision_backbone.image_vit.transformer.resblocks.2.feed_forward.w1",
79
+ "model.vision_backbone.image_vit.transformer.resblocks.2.feed_forward.w2",
80
+ "model.vision_backbone.image_vit.transformer.resblocks.3.attention.wq",
81
+ "model.vision_backbone.image_vit.transformer.resblocks.3.attention.wk",
82
+ "model.vision_backbone.image_vit.transformer.resblocks.3.attention.wv",
83
+ "model.vision_backbone.image_vit.transformer.resblocks.3.attention.wo",
84
+ "model.vision_backbone.image_vit.transformer.resblocks.3.feed_forward.w1",
85
+ "model.vision_backbone.image_vit.transformer.resblocks.3.feed_forward.w2",
86
+ "model.vision_backbone.image_vit.transformer.resblocks.4.attention.wq",
87
+ "model.vision_backbone.image_vit.transformer.resblocks.4.attention.wk",
88
+ "model.vision_backbone.image_vit.transformer.resblocks.4.attention.wv",
89
+ "model.vision_backbone.image_vit.transformer.resblocks.4.attention.wo",
90
+ "model.vision_backbone.image_vit.transformer.resblocks.4.feed_forward.w1",
91
+ "model.vision_backbone.image_vit.transformer.resblocks.4.feed_forward.w2",
92
+ "model.vision_backbone.image_vit.transformer.resblocks.5.attention.wq",
93
+ "model.vision_backbone.image_vit.transformer.resblocks.5.attention.wk",
94
+ "model.vision_backbone.image_vit.transformer.resblocks.5.attention.wv",
95
+ "model.vision_backbone.image_vit.transformer.resblocks.5.attention.wo",
96
+ "model.vision_backbone.image_vit.transformer.resblocks.5.feed_forward.w1",
97
+ "model.vision_backbone.image_vit.transformer.resblocks.5.feed_forward.w2",
98
+ "model.vision_backbone.image_vit.transformer.resblocks.6.attention.wq",
99
+ "model.vision_backbone.image_vit.transformer.resblocks.6.attention.wk",
100
+ "model.vision_backbone.image_vit.transformer.resblocks.6.attention.wv",
101
+ "model.vision_backbone.image_vit.transformer.resblocks.6.attention.wo",
102
+ "model.vision_backbone.image_vit.transformer.resblocks.6.feed_forward.w1",
103
+ "model.vision_backbone.image_vit.transformer.resblocks.6.feed_forward.w2",
104
+ "model.vision_backbone.image_vit.transformer.resblocks.7.attention.wq",
105
+ "model.vision_backbone.image_vit.transformer.resblocks.7.attention.wk",
106
+ "model.vision_backbone.image_vit.transformer.resblocks.7.attention.wv",
107
+ "model.vision_backbone.image_vit.transformer.resblocks.7.attention.wo",
108
+ "model.vision_backbone.image_vit.transformer.resblocks.7.feed_forward.w1",
109
+ "model.vision_backbone.image_vit.transformer.resblocks.7.feed_forward.w2",
110
+ "model.vision_backbone.image_vit.transformer.resblocks.8.attention.wq",
111
+ "model.vision_backbone.image_vit.transformer.resblocks.8.attention.wk",
112
+ "model.vision_backbone.image_vit.transformer.resblocks.8.attention.wv",
113
+ "model.vision_backbone.image_vit.transformer.resblocks.8.attention.wo",
114
+ "model.vision_backbone.image_vit.transformer.resblocks.8.feed_forward.w1",
115
+ "model.vision_backbone.image_vit.transformer.resblocks.8.feed_forward.w2",
116
+ "model.vision_backbone.image_vit.transformer.resblocks.9.attention.wq",
117
+ "model.vision_backbone.image_vit.transformer.resblocks.9.attention.wk",
118
+ "model.vision_backbone.image_vit.transformer.resblocks.9.attention.wv",
119
+ "model.vision_backbone.image_vit.transformer.resblocks.9.attention.wo",
120
+ "model.vision_backbone.image_vit.transformer.resblocks.9.feed_forward.w1",
121
+ "model.vision_backbone.image_vit.transformer.resblocks.9.feed_forward.w2",
122
+ "model.vision_backbone.image_vit.transformer.resblocks.10.attention.wq",
123
+ "model.vision_backbone.image_vit.transformer.resblocks.10.attention.wk",
124
+ "model.vision_backbone.image_vit.transformer.resblocks.10.attention.wv",
125
+ "model.vision_backbone.image_vit.transformer.resblocks.10.attention.wo",
126
+ "model.vision_backbone.image_vit.transformer.resblocks.10.feed_forward.w1",
127
+ "model.vision_backbone.image_vit.transformer.resblocks.10.feed_forward.w2",
128
+ "model.vision_backbone.image_vit.transformer.resblocks.11.attention.wq",
129
+ "model.vision_backbone.image_vit.transformer.resblocks.11.attention.wk",
130
+ "model.vision_backbone.image_vit.transformer.resblocks.11.attention.wv",
131
+ "model.vision_backbone.image_vit.transformer.resblocks.11.attention.wo",
132
+ "model.vision_backbone.image_vit.transformer.resblocks.11.feed_forward.w1",
133
+ "model.vision_backbone.image_vit.transformer.resblocks.11.feed_forward.w2",
134
+ "model.vision_backbone.image_vit.transformer.resblocks.12.attention.wq",
135
+ "model.vision_backbone.image_vit.transformer.resblocks.12.attention.wk",
136
+ "model.vision_backbone.image_vit.transformer.resblocks.12.attention.wv",
137
+ "model.vision_backbone.image_vit.transformer.resblocks.12.attention.wo",
138
+ "model.vision_backbone.image_vit.transformer.resblocks.12.feed_forward.w1",
139
+ "model.vision_backbone.image_vit.transformer.resblocks.12.feed_forward.w2",
140
+ "model.vision_backbone.image_vit.transformer.resblocks.13.attention.wq",
141
+ "model.vision_backbone.image_vit.transformer.resblocks.13.attention.wk",
142
+ "model.vision_backbone.image_vit.transformer.resblocks.13.attention.wv",
143
+ "model.vision_backbone.image_vit.transformer.resblocks.13.attention.wo",
144
+ "model.vision_backbone.image_vit.transformer.resblocks.13.feed_forward.w1",
145
+ "model.vision_backbone.image_vit.transformer.resblocks.13.feed_forward.w2",
146
+ "model.vision_backbone.image_vit.transformer.resblocks.14.attention.wq",
147
+ "model.vision_backbone.image_vit.transformer.resblocks.14.attention.wk",
148
+ "model.vision_backbone.image_vit.transformer.resblocks.14.attention.wv",
149
+ "model.vision_backbone.image_vit.transformer.resblocks.14.attention.wo",
150
+ "model.vision_backbone.image_vit.transformer.resblocks.14.feed_forward.w1",
151
+ "model.vision_backbone.image_vit.transformer.resblocks.14.feed_forward.w2",
152
+ "model.vision_backbone.image_vit.transformer.resblocks.15.attention.wq",
153
+ "model.vision_backbone.image_vit.transformer.resblocks.15.attention.wk",
154
+ "model.vision_backbone.image_vit.transformer.resblocks.15.attention.wv",
155
+ "model.vision_backbone.image_vit.transformer.resblocks.15.attention.wo",
156
+ "model.vision_backbone.image_vit.transformer.resblocks.15.feed_forward.w1",
157
+ "model.vision_backbone.image_vit.transformer.resblocks.15.feed_forward.w2",
158
+ "model.vision_backbone.image_vit.transformer.resblocks.16.attention.wq",
159
+ "model.vision_backbone.image_vit.transformer.resblocks.16.attention.wk",
160
+ "model.vision_backbone.image_vit.transformer.resblocks.16.attention.wv",
161
+ "model.vision_backbone.image_vit.transformer.resblocks.16.attention.wo",
162
+ "model.vision_backbone.image_vit.transformer.resblocks.16.feed_forward.w1",
163
+ "model.vision_backbone.image_vit.transformer.resblocks.16.feed_forward.w2",
164
+ "model.vision_backbone.image_vit.transformer.resblocks.17.attention.wq",
165
+ "model.vision_backbone.image_vit.transformer.resblocks.17.attention.wk",
166
+ "model.vision_backbone.image_vit.transformer.resblocks.17.attention.wv",
167
+ "model.vision_backbone.image_vit.transformer.resblocks.17.attention.wo",
168
+ "model.vision_backbone.image_vit.transformer.resblocks.17.feed_forward.w1",
169
+ "model.vision_backbone.image_vit.transformer.resblocks.17.feed_forward.w2",
170
+ "model.vision_backbone.image_vit.transformer.resblocks.18.attention.wq",
171
+ "model.vision_backbone.image_vit.transformer.resblocks.18.attention.wk",
172
+ "model.vision_backbone.image_vit.transformer.resblocks.18.attention.wv",
173
+ "model.vision_backbone.image_vit.transformer.resblocks.18.attention.wo",
174
+ "model.vision_backbone.image_vit.transformer.resblocks.18.feed_forward.w1",
175
+ "model.vision_backbone.image_vit.transformer.resblocks.18.feed_forward.w2",
176
+ "model.vision_backbone.image_vit.transformer.resblocks.19.attention.wq",
177
+ "model.vision_backbone.image_vit.transformer.resblocks.19.attention.wk",
178
+ "model.vision_backbone.image_vit.transformer.resblocks.19.attention.wv",
179
+ "model.vision_backbone.image_vit.transformer.resblocks.19.attention.wo",
180
+ "model.vision_backbone.image_vit.transformer.resblocks.19.feed_forward.w1",
181
+ "model.vision_backbone.image_vit.transformer.resblocks.19.feed_forward.w2",
182
+ "model.vision_backbone.image_vit.transformer.resblocks.20.attention.wq",
183
+ "model.vision_backbone.image_vit.transformer.resblocks.20.attention.wk",
184
+ "model.vision_backbone.image_vit.transformer.resblocks.20.attention.wv",
185
+ "model.vision_backbone.image_vit.transformer.resblocks.20.attention.wo",
186
+ "model.vision_backbone.image_vit.transformer.resblocks.20.feed_forward.w1",
187
+ "model.vision_backbone.image_vit.transformer.resblocks.20.feed_forward.w2",
188
+ "model.vision_backbone.image_vit.transformer.resblocks.21.attention.wq",
189
+ "model.vision_backbone.image_vit.transformer.resblocks.21.attention.wk",
190
+ "model.vision_backbone.image_vit.transformer.resblocks.21.attention.wv",
191
+ "model.vision_backbone.image_vit.transformer.resblocks.21.attention.wo",
192
+ "model.vision_backbone.image_vit.transformer.resblocks.21.feed_forward.w1",
193
+ "model.vision_backbone.image_vit.transformer.resblocks.21.feed_forward.w2",
194
+ "model.vision_backbone.image_vit.transformer.resblocks.22.attention.wq",
195
+ "model.vision_backbone.image_vit.transformer.resblocks.22.attention.wk",
196
+ "model.vision_backbone.image_vit.transformer.resblocks.22.attention.wv",
197
+ "model.vision_backbone.image_vit.transformer.resblocks.22.attention.wo",
198
+ "model.vision_backbone.image_vit.transformer.resblocks.22.feed_forward.w1",
199
+ "model.vision_backbone.image_vit.transformer.resblocks.22.feed_forward.w2",
200
+ "model.vision_backbone.image_pooling_2d.wq",
201
+ "model.vision_backbone.image_pooling_2d.wk",
202
+ "model.vision_backbone.image_pooling_2d.wv",
203
+ "model.vision_backbone.image_pooling_2d.wo",
204
+ "model.vision_backbone.image_projector.w1",
205
+ "model.vision_backbone.image_projector.w2",
206
+ "model.vision_backbone.image_projector.w3"
207
+ ],
208
+ "kv_cache_scheme": null,
209
+ "quant_method": "compressed-tensors",
210
+ "quantization_status": "compressed"
211
+ },
212
+ "rope_theta": 1000000.0,
213
+ "tie_word_embeddings": false,
214
+ "torch_dtype": "float32",
215
+ "transformers_version": "4.46.1",
216
+ "use_cache": true,
217
+ "use_position_ids": true,
218
+ "vocab_size": 152064,
219
+ "weight_tying": false
220
+ }
config_molmo.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List
2
+
3
+ from transformers import PretrainedConfig, AutoTokenizer
4
+
5
+
6
+ class MolmoConfig(PretrainedConfig):
7
+ model_type = "molmo"
8
+ keys_to_ignore_at_inference = ["past_key_values"]
9
+
10
+ def __init__(
11
+ self,
12
+ vocab_size=50304,
13
+ embedding_size=50304,
14
+ hidden_size=4096,
15
+ intermediate_size=11008,
16
+ num_hidden_layers=32,
17
+ num_attention_heads=32,
18
+ num_key_value_heads=None,
19
+ max_position_embeddings=2048,
20
+ initializer_range=0.02,
21
+ use_cache=True,
22
+ layer_norm_eps: float = 1e-5,
23
+ rope_theta=10000.0,
24
+ clip_qkv=None,
25
+ qkv_bias: bool = False,
26
+ weight_tying: bool = False,
27
+ use_position_ids: bool=True,
28
+ tie_word_embeddings: bool=True,
29
+ attention_layer_norm: bool=False,
30
+ norm_after: bool = False,
31
+ layer_norm_type: str="rms",
32
+ **kwargs,
33
+ ):
34
+ self.vocab_size = vocab_size
35
+ self.embedding_size = embedding_size
36
+ self.max_position_embeddings = max_position_embeddings
37
+ self.hidden_size = hidden_size
38
+ self.intermediate_size = intermediate_size
39
+ self.num_hidden_layers = num_hidden_layers
40
+ self.num_attention_heads = num_attention_heads
41
+ self.layer_norm_eps = layer_norm_eps
42
+ self.weight_tying = weight_tying
43
+ self.use_position_ids = use_position_ids
44
+ self.attention_layer_norm = attention_layer_norm
45
+ self.num_key_value_heads = num_key_value_heads
46
+ self.initializer_range = initializer_range
47
+ self.use_cache = use_cache
48
+ self.rope_theta = rope_theta
49
+ self.clip_qkv = clip_qkv
50
+ self.qkv_bias = qkv_bias
51
+ self.norm_after = norm_after
52
+ self.tie_word_embeddings = tie_word_embeddings
53
+ self.layer_norm_type = layer_norm_type
54
+
55
+ super().__init__(
56
+ tie_word_embeddings=tie_word_embeddings,
57
+ **kwargs,
58
+ )
59
+
60
+ MolmoConfig.register_for_auto_class()
generation_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "transformers_version": "4.46.1"
4
+ }
image_preprocessing_molmo.py ADDED
@@ -0,0 +1,546 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Image processor class for Molmo"""
2
+ from typing import List, Optional, Union, Mapping
3
+
4
+ import numpy as np
5
+ import einops
6
+ import torch
7
+ import torchvision.transforms
8
+ from torchvision.transforms import InterpolationMode
9
+ from torchvision.transforms.functional import convert_image_dtype
10
+
11
+ from transformers.image_utils import (
12
+ OPENAI_CLIP_MEAN,
13
+ OPENAI_CLIP_STD,
14
+ ImageInput,
15
+ is_valid_image,
16
+ )
17
+ from transformers.processing_utils import ImagesKwargs
18
+ from transformers.image_processing_utils import BaseImageProcessor
19
+ from transformers.utils import logging
20
+
21
+
22
+ logger = logging.get_logger(__name__)
23
+
24
+
25
+ def pad_to_bounding_box(
26
+ image, offset_height, offset_width, target_height,
27
+ target_width, value=0
28
+ ):
29
+ height, width = image.shape[:2]
30
+ after_padding_width = target_width - offset_width - width
31
+ after_padding_height = target_height - offset_height - height
32
+ return np.pad(image, [
33
+ [offset_height, after_padding_height],
34
+ [offset_width, after_padding_width],
35
+ [0, 0]
36
+ ], constant_values=value)
37
+
38
+
39
+ def normalize_image(image, offset, scale):
40
+ image -= np.array(offset, dtype=np.float32)[None, None, :]
41
+ image /= np.array(scale, dtype=np.float32)[None, None, :]
42
+ return image
43
+
44
+
45
+ def resize_and_pad(
46
+ image,
47
+ desired_output_size,
48
+ resize_method="torch-bilinear",
49
+ pad_value=0,
50
+ normalize=True,
51
+ image_mean=OPENAI_CLIP_MEAN,
52
+ image_std=OPENAI_CLIP_STD,
53
+ ):
54
+ desired_height, desired_width = desired_output_size
55
+ height, width = image.shape[:2]
56
+
57
+ # Cast into float32 since the training code did this in float32 and it (very rarely) effects
58
+ # the results after rounding.
59
+ image_scale_y = np.array(desired_height, np.float32) / np.array(height, np.float32)
60
+ image_scale_x = np.array(desired_width, np.float32) / np.array(width, np.float32)
61
+ image_scale = min(image_scale_x, image_scale_y)
62
+ scaled_height = int(np.array(height, np.float32) * image_scale)
63
+ scaled_width = int(np.array(width, np.float32) * image_scale)
64
+
65
+ if resize_method == "tensorflow":
66
+ # This how the original training code did resizing, it can produce slightly different
67
+ # results then using torch resize so we keep it just in case
68
+ import tensorflow as tf
69
+ image = tf.image.convert_image_dtype(tf.constant(image), dtype=tf.float32)
70
+ image = tf.image.resize(
71
+ image,
72
+ [scaled_height, scaled_width],
73
+ method=tf.image.ResizeMethod.BILINEAR,
74
+ antialias=True,
75
+ )
76
+ image = tf.clip_by_value(image, 0.0, 1.0)
77
+ image = image.numpy()
78
+ elif resize_method == "torch-bilinear":
79
+ image = torch.permute(torch.from_numpy(image), [2, 0, 1])
80
+ image = convert_image_dtype(image) # resize in float32 to match the training code
81
+ image = torchvision.transforms.Resize(
82
+ [scaled_height, scaled_width], InterpolationMode.BILINEAR, antialias=True
83
+ )(image)
84
+ image = torch.clip(image, 0.0, 1.0)
85
+ image = torch.permute(image, [1, 2, 0]).numpy()
86
+ else:
87
+ raise NotImplementedError(resize_method)
88
+
89
+ top_pad = (desired_height - scaled_height) // 2
90
+ left_pad = (desired_width - scaled_width) // 2
91
+ padding = [
92
+ [top_pad, desired_height - scaled_height - top_pad],
93
+ [left_pad, desired_width - scaled_width - left_pad],
94
+ [0, 0]
95
+ ]
96
+ image_mask = np.pad(np.ones_like(image[:, :, 0], dtype=bool), padding[:2])
97
+ image = np.pad(image, padding, constant_values=pad_value)
98
+ if normalize:
99
+ image = normalize_image(image, offset=image_mean, scale=image_std)
100
+ return image, image_mask
101
+
102
+
103
+ def select_tiling(h, w, patch_size, max_num_patches):
104
+ """Decide how best to divide in image of size [w, h] in up to max_num_patches of size patch_size"""
105
+ original_size = np.stack([h, w]) # [1, 2]
106
+ original_res = h * w
107
+ tilings = []
108
+ for i in range(1, max_num_patches+1):
109
+ for j in range(1, max_num_patches+1):
110
+ if i*j <= max_num_patches:
111
+ tilings.append((i, j))
112
+ # sort so argmin and argmax favour smaller tilings in the event of a tie
113
+ tilings.sort(key=lambda x: (x[0]*x[1], x[0]))
114
+ candidate_tilings = np.array(tilings, dtype=np.int32) # [n_resolutions, 2]
115
+ candidate_resolutions = candidate_tilings * patch_size # [n_resolutions, 2]
116
+
117
+ # How much we would need to scale the image to fit exactly in each tiling
118
+ original_size = np.stack([h, w], dtype=np.float32) # [1, 2]
119
+ required_scale_d = candidate_resolutions.astype(np.float32) / original_size
120
+ required_scale = np.min(required_scale_d, axis=-1, keepdims=True) # [n_resolutions, 1]
121
+ if np.all(required_scale < 1):
122
+ # We are forced to downscale, so try to minimize the amount of downscaling
123
+ ix = np.argmax(required_scale)
124
+ else:
125
+ # Pick the resolution that required the least upscaling so that it most closely fits the image
126
+ required_scale = np.where(required_scale < 1.0, 10e9, required_scale)
127
+ ix = np.argmin(required_scale)
128
+ return candidate_tilings[ix]
129
+
130
+
131
+ class MolmoImagesKwargs(ImagesKwargs, total=False):
132
+ max_crops: Optional[int]
133
+ overlap_margins: Optional[List[int]]
134
+ base_image_input_size: Optional[List[int]]
135
+ image_token_length_w: Optional[int]
136
+ image_token_length_h: Optional[int]
137
+ image_patch_size: Optional[int]
138
+ image_padding_mask: Optional[bool]
139
+
140
+
141
+ class MolmoImageProcessor(BaseImageProcessor):
142
+ """Preprocess images and multi-model inputs"""
143
+
144
+ def __init__(
145
+ self,
146
+ max_crops: int = 12,
147
+ overlap_margins: List[int] = (4, 4),
148
+ base_image_input_size: List[int] = (336, 336),
149
+ image_token_length_w: int = 12,
150
+ image_token_length_h: int = 12,
151
+ image_patch_size: int = 14,
152
+ image_padding_mask: bool = True,
153
+ do_normalize: bool = True,
154
+ image_mean: Optional[Union[float, List[float]]] = None,
155
+ image_std: Optional[Union[float, List[float]]] = None,
156
+ **kwargs,
157
+ ):
158
+ super().__init__(**kwargs)
159
+ self.max_crops = max_crops
160
+ self.overlap_margins = overlap_margins
161
+ self.base_image_input_size = base_image_input_size
162
+ self.image_token_length_w = image_token_length_w
163
+ self.image_token_length_h = image_token_length_h
164
+ self.image_patch_size = image_patch_size
165
+ self.image_padding_mask = image_padding_mask
166
+ self.do_normalize = do_normalize
167
+ self.image_mean = image_mean if image_mean is not None else OPENAI_CLIP_MEAN
168
+ self.image_std = image_std if image_std is not None else OPENAI_CLIP_STD
169
+
170
+ def image_to_patches_and_tokens(
171
+ self,
172
+ image: ImageInput,
173
+ image_patch_token_id: int,
174
+ image_col_token_id: int,
175
+ image_start_token_id: int,
176
+ image_end_token_id: int,
177
+ max_crops: Optional[int] = None,
178
+ overlap_margins: Optional[List[int]] = None,
179
+ base_image_input_size: Optional[Union[int, List[int]]] = None,
180
+ image_token_length_w: Optional[int] = None,
181
+ image_token_length_h: Optional[int] = None,
182
+ image_patch_size: Optional[int] = None,
183
+ ):
184
+ if isinstance(base_image_input_size, int):
185
+ base_image_input_size = (base_image_input_size, base_image_input_size)
186
+
187
+ base_image_input_d = image_patch_size
188
+ tokens_per_image = image_token_length_w * image_token_length_h
189
+ image_base_patch_w = base_image_input_size[1] // base_image_input_d
190
+ image_base_patch_h = base_image_input_size[0] // base_image_input_d
191
+
192
+ original_image_h, original_image_w = image.shape[:2]
193
+ crop_size = base_image_input_size[0]
194
+
195
+ # Discard this many patches from the (left/top, right/bottom) of crops
196
+ left_margin, right_margin = overlap_margins
197
+ # left_margin, right_margin = 2, 2
198
+ assert left_margin % 2 == 0 # Required for compatibility with 2x2 pooling
199
+ total_margin_pixels = base_image_input_d*(right_margin + left_margin) # pixels removed per dim
200
+ crop_patches = base_image_input_size[0] // base_image_input_d # patches per crop dim
201
+ crop_window_patches = crop_patches - (right_margin + left_margin) # usable patches
202
+ crop_window_size = crop_window_patches * base_image_input_d
203
+ tiling = select_tiling(
204
+ original_image_h - total_margin_pixels,
205
+ original_image_w - total_margin_pixels,
206
+ crop_window_size,
207
+ max_crops
208
+ )
209
+ src, img_mask = resize_and_pad(
210
+ image,
211
+ [tiling[0]*crop_window_size+total_margin_pixels, tiling[1]*crop_window_size+total_margin_pixels]
212
+ )
213
+
214
+ # Now we have to split the image into crops, while keeping track of how each patch in the
215
+ # each crop should be ordered in the global image, this require a lot of tricky booking
216
+ n_crops = tiling[0] * tiling[1]
217
+ patches_arr = []
218
+ mask_arr = []
219
+ patch_ordering_arr = []
220
+
221
+ # We assume 2x2 pooling, but can allow padding the right/bottom with extra
222
+ # patches if the number of patches per side is not even
223
+ assert (crop_patches+1)//2 == image_token_length_h
224
+ assert (crop_patches+1)//2 == image_token_length_w
225
+ on = 0
226
+ on_patch = 0
227
+ for i in range(tiling[0]):
228
+ y0 = i*crop_window_size
229
+ if i == 0:
230
+ crop_y0 = 0
231
+ else:
232
+ crop_y0 = left_margin // 2
233
+
234
+ crop_h = image_base_patch_h - (right_margin + left_margin)
235
+ if i == 0:
236
+ crop_h += left_margin
237
+ if i == (tiling[0]-1):
238
+ crop_h += right_margin
239
+ for j in range(tiling[1]):
240
+ x0 = j*crop_window_size
241
+ if j == 0:
242
+ crop_x0 = 0
243
+ else:
244
+ crop_x0 = left_margin // 2
245
+
246
+ crop_w = image_base_patch_w - (right_margin + left_margin)
247
+ if j == 0:
248
+ crop_w += left_margin
249
+ if j == (tiling[1]-1):
250
+ crop_w += right_margin
251
+
252
+ pooled_w = (crop_w + 1) // 2
253
+ pooled_h = (crop_h + 1) // 2
254
+ patch_ordering_arr.append(
255
+ pad_to_bounding_box(
256
+ np.reshape(np.arange(on, on+pooled_h*pooled_w, dtype=np.int32), (pooled_h, pooled_w, 1)),
257
+ crop_y0, crop_x0, image_token_length_h, image_token_length_w, value=-1
258
+ )[:, :, 0]
259
+ )
260
+ patches_arr.append(src[y0:y0+crop_size, x0:x0+crop_size])
261
+ mask_arr.append(img_mask[y0:y0+crop_size, x0:x0+crop_size])
262
+
263
+ on += pooled_h*pooled_w
264
+ on_patch += 1
265
+ patches = np.stack(patches_arr)
266
+ patch_ordering = np.stack(patch_ordering_arr)
267
+ img_mask = np.stack(mask_arr)
268
+
269
+ # Switch to [n_crops, n_patches, pixels_per_patch] format
270
+ image_layout_impatch_w, image_layout_impatch_h = tiling[0], tiling[1]
271
+ patches = einops.rearrange(
272
+ patches, 'p (h dh) (w dw) c -> p (h w) (dh dw c)',
273
+ dh=base_image_input_d,
274
+ dw=base_image_input_d,
275
+ h=image_base_patch_h,
276
+ w=image_base_patch_w
277
+ )
278
+ img_mask = einops.rearrange(
279
+ img_mask, 'p (h dh) (w dw) -> p (h w) (dh dw)',
280
+ dh=base_image_input_d,
281
+ dw=base_image_input_d,
282
+ h=image_base_patch_h,
283
+ w=image_base_patch_w
284
+ )
285
+
286
+ img_mask = img_mask.astype(np.float32).mean(axis=-1)
287
+ patch_ordering = np.reshape(patch_ordering, [-1])
288
+ valid = patch_ordering >= 0
289
+
290
+ # Transpose order, to get left-to-right order instead of crop-by-crop order
291
+ patch_ordering_rh = np.reshape(
292
+ patch_ordering,
293
+ [tiling[0], tiling[1], image_token_length_h, image_token_length_w]
294
+ )
295
+ patch_ordering_rh = np.transpose(patch_ordering_rh, [0, 2, 1, 3])
296
+ patch_ordering_rh = np.reshape(patch_ordering_rh, [-1])
297
+
298
+ # The transpose will screw up which patches are masked, project the
299
+ # new order into sparse structure of `patch_ordering` to fix this
300
+ patch_ordering[valid] = patch_ordering_rh[patch_ordering_rh >= 0]
301
+
302
+ # Now build the output tokens
303
+ h = tiling[0] * crop_window_patches + (right_margin+left_margin)
304
+ w = tiling[1] * crop_window_patches + (right_margin+left_margin)
305
+ per_row = np.full(
306
+ ((w+1)//2,),
307
+ image_patch_token_id,
308
+ )
309
+ per_row = np.concatenate([per_row, [image_col_token_id]], 0)
310
+
311
+ joint = np.tile(per_row, [(h+1)//2])
312
+ joint = [
313
+ [image_start_token_id],
314
+ joint,
315
+ [image_end_token_id]
316
+ ]
317
+
318
+ # Finally do the same for the global image
319
+ resized, _ = resize_and_pad(image, base_image_input_size)
320
+ resized = einops.rearrange(
321
+ resized, '(h dh) (w dw) c -> (h w) (dh dw c)',
322
+ dh=base_image_input_d,
323
+ dw=base_image_input_d,
324
+ h=image_base_patch_h,
325
+ w=image_base_patch_w
326
+ )
327
+ patches = np.concatenate([np.expand_dims(resized, 0), patches], 0)
328
+
329
+ # Global image goes first, so the order of patches in previous crops gets increased
330
+ patch_ordering = np.where(
331
+ patch_ordering >= 0,
332
+ patch_ordering + tokens_per_image,
333
+ -1
334
+ )
335
+ patch_ordering = np.concatenate([np.arange(0, tokens_per_image), patch_ordering], 0)
336
+ per_row = np.full(
337
+ (image_token_length_w,),
338
+ image_patch_token_id,
339
+ )
340
+ per_row = np.concatenate([per_row, [image_col_token_id]], 0)
341
+ extra_tokens = np.tile(per_row, [image_token_length_h])
342
+ joint = [
343
+ [image_start_token_id],
344
+ extra_tokens,
345
+ [image_end_token_id],
346
+ ] + joint
347
+
348
+ joint = np.concatenate(joint, 0)
349
+ img_mask = np.pad(img_mask, [[0, 1], [0, 0]], constant_values=-1)
350
+ return patches, joint, patch_ordering, img_mask
351
+
352
+ def build_image_input_idx(
353
+ self,
354
+ image_tokens: np.ndarray,
355
+ patch_order: np.ndarray,
356
+ image_patch_token_id: int,
357
+ no_image: Optional[bool] = None,
358
+ image_token_length_w: Optional[int] = None,
359
+ image_token_length_h: Optional[int] = None,
360
+ ):
361
+ """Converts `patch_order` into a mapping of token_id -> patch_id"""
362
+
363
+ tokens_per_image = image_token_length_w * image_token_length_h
364
+ if no_image is not None and no_image:
365
+ return np.zeros((0, tokens_per_image), np.int32)
366
+
367
+ # Indices to insert the patches
368
+ image_input_idx = image_tokens == image_patch_token_id
369
+ image_input_idx = np.nonzero(image_input_idx)[0].astype(np.int32)
370
+
371
+ if patch_order is not None:
372
+ n_tokens = image_input_idx.shape[0]
373
+ patch_order = np.reshape(patch_order, [-1])
374
+ n_patches = patch_order.shape[0]
375
+
376
+ valid = patch_order >= 0
377
+ n_valid_patches = valid.sum()
378
+ assert len(image_input_idx) == n_valid_patches
379
+
380
+ sorted_patch_ixs = np.zeros([n_tokens], np.int32)
381
+ sorted_patch_ixs[patch_order[valid]] = np.arange(n_valid_patches, dtype=np.int32)
382
+
383
+ # Project the inverted mapping into same sparse structure
384
+ sorted_patch_ixs_ex = np.full(np.shape(patch_order), -1)
385
+ sorted_patch_ixs_ex[valid] = sorted_patch_ixs
386
+
387
+ # Do the gather and then re-masked outputs that were masked in `sorted_patch_ixs`
388
+ valid = (sorted_patch_ixs_ex >= 0).astype(np.int32)
389
+ image_input_idx = image_input_idx[sorted_patch_ixs_ex*valid]
390
+ image_input_idx = image_input_idx*valid - 100*(1 - valid)
391
+ image_input_idx = np.reshape(image_input_idx, [-1, tokens_per_image])
392
+ return image_input_idx
393
+
394
+ def preprocess(
395
+ self,
396
+ image: np.ndarray,
397
+ image_patch_token_id: int,
398
+ image_col_token_id: int,
399
+ image_start_token_id: int,
400
+ image_end_token_id: int,
401
+ max_crops: Optional[int] = None,
402
+ overlap_margins: Optional[List[int]] = None,
403
+ base_image_input_size: Optional[Union[int, List[int]]] = None,
404
+ image_token_length_w: Optional[int] = None,
405
+ image_token_length_h: Optional[int] = None,
406
+ image_patch_size: Optional[int] = None,
407
+ **kwargs,
408
+ ):
409
+ """Preprocesses an image
410
+
411
+ Returns:
412
+ crops: (n_crops, n_patches, patch_dim) individual crops, `n_crops` might
413
+ change between images but the other dimension are fixed
414
+ tokens: (n_tokens,) int32 tokens, pad tokens indicate where to insert the
415
+ patch features, might include other special tokens as well
416
+ image_idx: (n_crops, n_patches) index in `tokens` to put the patch features from the
417
+ crops after pooling, negative values indicates patches features to exclude
418
+ padding_mask: (n_crops, n_patches) what percent of each crop is padding, can be None
419
+ if the image mask is not being used.
420
+ """
421
+
422
+ max_crops = max_crops or self.max_crops
423
+ overlap_margins = overlap_margins or self.overlap_margins
424
+ base_image_input_size = base_image_input_size or self.base_image_input_size
425
+ image_token_length_w = image_token_length_w or self.image_token_length_w
426
+ image_token_length_h = image_token_length_h or self.image_token_length_h
427
+ image_patch_size = image_patch_size or self.image_patch_size
428
+
429
+ crops, image_tokens, patch_ordering, img_mask = self.image_to_patches_and_tokens(
430
+ image,
431
+ image_patch_token_id,
432
+ image_col_token_id,
433
+ image_start_token_id,
434
+ image_end_token_id,
435
+ max_crops,
436
+ overlap_margins,
437
+ base_image_input_size,
438
+ image_token_length_w,
439
+ image_token_length_h,
440
+ image_patch_size,
441
+ )
442
+ patch_idx = self.build_image_input_idx(
443
+ image_tokens,
444
+ patch_ordering,
445
+ image_patch_token_id,
446
+ image_token_length_w=image_token_length_w,
447
+ image_token_length_h=image_token_length_h,
448
+ )
449
+ return crops, image_tokens, patch_idx, img_mask
450
+
451
+ def multimodal_preprocess(
452
+ self,
453
+ images: np.ndarray,
454
+ tokens: List[int],
455
+ image_idx: np.ndarray,
456
+ sequence_length: int,
457
+ image_patch_token_id: int,
458
+ image_col_token_id: int,
459
+ image_start_token_id: int,
460
+ image_end_token_id: int,
461
+ **kwargs,
462
+ ):
463
+ """Merge images and text tokens into multi-modal features for the model
464
+
465
+ :param images: images to use as input
466
+ :param tokens: input text tokens
467
+ :param image_idx: where to insert the images into `tokens`
468
+ :params image_patch_token_id: id to use of tokens that will contain image features
469
+ :params image_col_token_id: token id for image column special tokens
470
+ :params image_start_token_id: token id for image start special tokens
471
+ :params image_end_token_id: token id for image end special tokens
472
+ :params kwargs: override preprocessor default args
473
+ """
474
+ max_total_crops = kwargs.get("max_crops") or self.max_crops
475
+ image_token_length_w = kwargs.get("image_token_length_w") or self.image_token_length_w
476
+ image_token_length_h = kwargs.get("image_token_length_h") or self.image_token_length_h
477
+ image_patch_size = kwargs.get("image_patch_size") or self.image_patch_size
478
+ base_image_input_size = kwargs.get("base_image_input_size") or self.base_image_input_size
479
+ image_num_patch = (
480
+ base_image_input_size[0] // image_patch_size,
481
+ base_image_input_size[1] // image_patch_size,
482
+ )
483
+ image_padding_mask = kwargs.get("image_padding_mask") or self.image_padding_mask
484
+
485
+ tokens_per_image = image_token_length_w * image_token_length_h
486
+ n_pixels = image_patch_size * image_patch_size * 3
487
+ n_patches = image_num_patch[0] * image_num_patch[1]
488
+
489
+ if images is None:
490
+ return {
491
+ "input_ids": tokens,
492
+ }
493
+ else:
494
+ n = len(images)
495
+ all_crops = []
496
+ all_image_idx = []
497
+ out_tokens = []
498
+ all_crop_masks = []
499
+
500
+ for ix in range(n):
501
+ token_ix = image_idx[ix]
502
+ crops, image_tokens, patch_idx, img_mask = self.preprocess(
503
+ images[ix],
504
+ image_patch_token_id,
505
+ image_col_token_id,
506
+ image_start_token_id,
507
+ image_end_token_id,
508
+ **kwargs,
509
+ )
510
+
511
+ if token_ix == -1: # -1 is an image inserted at the very start
512
+ start = 0
513
+ token_ix = 0
514
+ end = 0
515
+ else:
516
+ start = 0 if ix == 0 else image_idx[ix-1] + 1
517
+ end = token_ix + 1
518
+
519
+ all_image_idx.append(patch_idx + token_ix)
520
+ all_crops.append(crops)
521
+ out_tokens.append(tokens[start:token_ix])
522
+ out_tokens.append(image_tokens)
523
+ if ix == (n - 1):
524
+ out_tokens.append(tokens[end:])
525
+ if image_padding_mask:
526
+ all_crop_masks.append(img_mask)
527
+
528
+ input_ids = np.concatenate(out_tokens, 0)
529
+ images = np.concatenate(all_crops, 0)
530
+ image_input_idx = np.concatenate(all_image_idx, 0)
531
+ if image_padding_mask:
532
+ image_masks = np.concatenate(all_crop_masks, 0)
533
+ else:
534
+ image_masks = None
535
+
536
+ out = {
537
+ "input_ids": input_ids,
538
+ "images": images,
539
+ "image_input_idx": image_input_idx
540
+ }
541
+ if image_masks is not None:
542
+ out["image_masks"] = image_masks
543
+ return out
544
+
545
+
546
+ MolmoImageProcessor.register_for_auto_class()
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
model-00001-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26c34077de814c9c18c284343b1b0395eaf02d1375c2cc466763bc0c7f2333b0
3
+ size 4994215536
model-00002-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:835f5d444e0552385e4af37514bcdcf7de198c8d47739ef1695a45452b97cc85
3
+ size 4992452896
model-00003-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:280aac20b91a7085ea58ac37794c1cd76462b0fdc987bafa8a2a69ce500b37fd
3
+ size 892838928
model.safetensors.index.json ADDED
@@ -0,0 +1,705 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_size": 10879414272
4
+ },
5
+ "weight_map": {
6
+ "model.transformer.blocks.0.att_proj.bias": "model-00001-of-00003.safetensors",
7
+ "model.transformer.blocks.0.att_proj.weight": "model-00001-of-00003.safetensors",
8
+ "model.transformer.blocks.0.att_proj.weight_scale": "model-00001-of-00003.safetensors",
9
+ "model.transformer.blocks.0.attn_norm.weight": "model-00001-of-00003.safetensors",
10
+ "model.transformer.blocks.0.attn_out.weight": "model-00001-of-00003.safetensors",
11
+ "model.transformer.blocks.0.attn_out.weight_scale": "model-00001-of-00003.safetensors",
12
+ "model.transformer.blocks.0.ff_norm.weight": "model-00001-of-00003.safetensors",
13
+ "model.transformer.blocks.0.ff_out.weight": "model-00001-of-00003.safetensors",
14
+ "model.transformer.blocks.0.ff_out.weight_scale": "model-00001-of-00003.safetensors",
15
+ "model.transformer.blocks.0.ff_proj.weight": "model-00001-of-00003.safetensors",
16
+ "model.transformer.blocks.0.ff_proj.weight_scale": "model-00001-of-00003.safetensors",
17
+ "model.transformer.blocks.1.att_proj.bias": "model-00001-of-00003.safetensors",
18
+ "model.transformer.blocks.1.att_proj.weight": "model-00001-of-00003.safetensors",
19
+ "model.transformer.blocks.1.att_proj.weight_scale": "model-00001-of-00003.safetensors",
20
+ "model.transformer.blocks.1.attn_norm.weight": "model-00001-of-00003.safetensors",
21
+ "model.transformer.blocks.1.attn_out.weight": "model-00001-of-00003.safetensors",
22
+ "model.transformer.blocks.1.attn_out.weight_scale": "model-00001-of-00003.safetensors",
23
+ "model.transformer.blocks.1.ff_norm.weight": "model-00001-of-00003.safetensors",
24
+ "model.transformer.blocks.1.ff_out.weight": "model-00001-of-00003.safetensors",
25
+ "model.transformer.blocks.1.ff_out.weight_scale": "model-00001-of-00003.safetensors",
26
+ "model.transformer.blocks.1.ff_proj.weight": "model-00001-of-00003.safetensors",
27
+ "model.transformer.blocks.1.ff_proj.weight_scale": "model-00001-of-00003.safetensors",
28
+ "model.transformer.blocks.10.att_proj.bias": "model-00001-of-00003.safetensors",
29
+ "model.transformer.blocks.10.att_proj.weight": "model-00001-of-00003.safetensors",
30
+ "model.transformer.blocks.10.att_proj.weight_scale": "model-00001-of-00003.safetensors",
31
+ "model.transformer.blocks.10.attn_norm.weight": "model-00001-of-00003.safetensors",
32
+ "model.transformer.blocks.10.attn_out.weight": "model-00001-of-00003.safetensors",
33
+ "model.transformer.blocks.10.attn_out.weight_scale": "model-00001-of-00003.safetensors",
34
+ "model.transformer.blocks.10.ff_norm.weight": "model-00001-of-00003.safetensors",
35
+ "model.transformer.blocks.10.ff_out.weight": "model-00001-of-00003.safetensors",
36
+ "model.transformer.blocks.10.ff_out.weight_scale": "model-00001-of-00003.safetensors",
37
+ "model.transformer.blocks.10.ff_proj.weight": "model-00001-of-00003.safetensors",
38
+ "model.transformer.blocks.10.ff_proj.weight_scale": "model-00001-of-00003.safetensors",
39
+ "model.transformer.blocks.11.att_proj.bias": "model-00001-of-00003.safetensors",
40
+ "model.transformer.blocks.11.att_proj.weight": "model-00001-of-00003.safetensors",
41
+ "model.transformer.blocks.11.att_proj.weight_scale": "model-00001-of-00003.safetensors",
42
+ "model.transformer.blocks.11.attn_norm.weight": "model-00001-of-00003.safetensors",
43
+ "model.transformer.blocks.11.attn_out.weight": "model-00001-of-00003.safetensors",
44
+ "model.transformer.blocks.11.attn_out.weight_scale": "model-00001-of-00003.safetensors",
45
+ "model.transformer.blocks.11.ff_norm.weight": "model-00001-of-00003.safetensors",
46
+ "model.transformer.blocks.11.ff_out.weight": "model-00001-of-00003.safetensors",
47
+ "model.transformer.blocks.11.ff_out.weight_scale": "model-00001-of-00003.safetensors",
48
+ "model.transformer.blocks.11.ff_proj.weight": "model-00001-of-00003.safetensors",
49
+ "model.transformer.blocks.11.ff_proj.weight_scale": "model-00001-of-00003.safetensors",
50
+ "model.transformer.blocks.12.att_proj.bias": "model-00002-of-00003.safetensors",
51
+ "model.transformer.blocks.12.att_proj.weight": "model-00002-of-00003.safetensors",
52
+ "model.transformer.blocks.12.att_proj.weight_scale": "model-00002-of-00003.safetensors",
53
+ "model.transformer.blocks.12.attn_norm.weight": "model-00002-of-00003.safetensors",
54
+ "model.transformer.blocks.12.attn_out.weight": "model-00001-of-00003.safetensors",
55
+ "model.transformer.blocks.12.attn_out.weight_scale": "model-00001-of-00003.safetensors",
56
+ "model.transformer.blocks.12.ff_norm.weight": "model-00002-of-00003.safetensors",
57
+ "model.transformer.blocks.12.ff_out.weight": "model-00002-of-00003.safetensors",
58
+ "model.transformer.blocks.12.ff_out.weight_scale": "model-00002-of-00003.safetensors",
59
+ "model.transformer.blocks.12.ff_proj.weight": "model-00002-of-00003.safetensors",
60
+ "model.transformer.blocks.12.ff_proj.weight_scale": "model-00002-of-00003.safetensors",
61
+ "model.transformer.blocks.13.att_proj.bias": "model-00002-of-00003.safetensors",
62
+ "model.transformer.blocks.13.att_proj.weight": "model-00002-of-00003.safetensors",
63
+ "model.transformer.blocks.13.att_proj.weight_scale": "model-00002-of-00003.safetensors",
64
+ "model.transformer.blocks.13.attn_norm.weight": "model-00002-of-00003.safetensors",
65
+ "model.transformer.blocks.13.attn_out.weight": "model-00002-of-00003.safetensors",
66
+ "model.transformer.blocks.13.attn_out.weight_scale": "model-00002-of-00003.safetensors",
67
+ "model.transformer.blocks.13.ff_norm.weight": "model-00002-of-00003.safetensors",
68
+ "model.transformer.blocks.13.ff_out.weight": "model-00002-of-00003.safetensors",
69
+ "model.transformer.blocks.13.ff_out.weight_scale": "model-00002-of-00003.safetensors",
70
+ "model.transformer.blocks.13.ff_proj.weight": "model-00002-of-00003.safetensors",
71
+ "model.transformer.blocks.13.ff_proj.weight_scale": "model-00002-of-00003.safetensors",
72
+ "model.transformer.blocks.14.att_proj.bias": "model-00002-of-00003.safetensors",
73
+ "model.transformer.blocks.14.att_proj.weight": "model-00002-of-00003.safetensors",
74
+ "model.transformer.blocks.14.att_proj.weight_scale": "model-00002-of-00003.safetensors",
75
+ "model.transformer.blocks.14.attn_norm.weight": "model-00002-of-00003.safetensors",
76
+ "model.transformer.blocks.14.attn_out.weight": "model-00002-of-00003.safetensors",
77
+ "model.transformer.blocks.14.attn_out.weight_scale": "model-00002-of-00003.safetensors",
78
+ "model.transformer.blocks.14.ff_norm.weight": "model-00002-of-00003.safetensors",
79
+ "model.transformer.blocks.14.ff_out.weight": "model-00002-of-00003.safetensors",
80
+ "model.transformer.blocks.14.ff_out.weight_scale": "model-00002-of-00003.safetensors",
81
+ "model.transformer.blocks.14.ff_proj.weight": "model-00002-of-00003.safetensors",
82
+ "model.transformer.blocks.14.ff_proj.weight_scale": "model-00002-of-00003.safetensors",
83
+ "model.transformer.blocks.15.att_proj.bias": "model-00002-of-00003.safetensors",
84
+ "model.transformer.blocks.15.att_proj.weight": "model-00002-of-00003.safetensors",
85
+ "model.transformer.blocks.15.att_proj.weight_scale": "model-00002-of-00003.safetensors",
86
+ "model.transformer.blocks.15.attn_norm.weight": "model-00002-of-00003.safetensors",
87
+ "model.transformer.blocks.15.attn_out.weight": "model-00002-of-00003.safetensors",
88
+ "model.transformer.blocks.15.attn_out.weight_scale": "model-00002-of-00003.safetensors",
89
+ "model.transformer.blocks.15.ff_norm.weight": "model-00002-of-00003.safetensors",
90
+ "model.transformer.blocks.15.ff_out.weight": "model-00002-of-00003.safetensors",
91
+ "model.transformer.blocks.15.ff_out.weight_scale": "model-00002-of-00003.safetensors",
92
+ "model.transformer.blocks.15.ff_proj.weight": "model-00002-of-00003.safetensors",
93
+ "model.transformer.blocks.15.ff_proj.weight_scale": "model-00002-of-00003.safetensors",
94
+ "model.transformer.blocks.16.att_proj.bias": "model-00002-of-00003.safetensors",
95
+ "model.transformer.blocks.16.att_proj.weight": "model-00002-of-00003.safetensors",
96
+ "model.transformer.blocks.16.att_proj.weight_scale": "model-00002-of-00003.safetensors",
97
+ "model.transformer.blocks.16.attn_norm.weight": "model-00002-of-00003.safetensors",
98
+ "model.transformer.blocks.16.attn_out.weight": "model-00002-of-00003.safetensors",
99
+ "model.transformer.blocks.16.attn_out.weight_scale": "model-00002-of-00003.safetensors",
100
+ "model.transformer.blocks.16.ff_norm.weight": "model-00002-of-00003.safetensors",
101
+ "model.transformer.blocks.16.ff_out.weight": "model-00002-of-00003.safetensors",
102
+ "model.transformer.blocks.16.ff_out.weight_scale": "model-00002-of-00003.safetensors",
103
+ "model.transformer.blocks.16.ff_proj.weight": "model-00002-of-00003.safetensors",
104
+ "model.transformer.blocks.16.ff_proj.weight_scale": "model-00002-of-00003.safetensors",
105
+ "model.transformer.blocks.17.att_proj.bias": "model-00002-of-00003.safetensors",
106
+ "model.transformer.blocks.17.att_proj.weight": "model-00002-of-00003.safetensors",
107
+ "model.transformer.blocks.17.att_proj.weight_scale": "model-00002-of-00003.safetensors",
108
+ "model.transformer.blocks.17.attn_norm.weight": "model-00002-of-00003.safetensors",
109
+ "model.transformer.blocks.17.attn_out.weight": "model-00002-of-00003.safetensors",
110
+ "model.transformer.blocks.17.attn_out.weight_scale": "model-00002-of-00003.safetensors",
111
+ "model.transformer.blocks.17.ff_norm.weight": "model-00002-of-00003.safetensors",
112
+ "model.transformer.blocks.17.ff_out.weight": "model-00002-of-00003.safetensors",
113
+ "model.transformer.blocks.17.ff_out.weight_scale": "model-00002-of-00003.safetensors",
114
+ "model.transformer.blocks.17.ff_proj.weight": "model-00002-of-00003.safetensors",
115
+ "model.transformer.blocks.17.ff_proj.weight_scale": "model-00002-of-00003.safetensors",
116
+ "model.transformer.blocks.18.att_proj.bias": "model-00002-of-00003.safetensors",
117
+ "model.transformer.blocks.18.att_proj.weight": "model-00002-of-00003.safetensors",
118
+ "model.transformer.blocks.18.att_proj.weight_scale": "model-00002-of-00003.safetensors",
119
+ "model.transformer.blocks.18.attn_norm.weight": "model-00002-of-00003.safetensors",
120
+ "model.transformer.blocks.18.attn_out.weight": "model-00002-of-00003.safetensors",
121
+ "model.transformer.blocks.18.attn_out.weight_scale": "model-00002-of-00003.safetensors",
122
+ "model.transformer.blocks.18.ff_norm.weight": "model-00002-of-00003.safetensors",
123
+ "model.transformer.blocks.18.ff_out.weight": "model-00002-of-00003.safetensors",
124
+ "model.transformer.blocks.18.ff_out.weight_scale": "model-00002-of-00003.safetensors",
125
+ "model.transformer.blocks.18.ff_proj.weight": "model-00002-of-00003.safetensors",
126
+ "model.transformer.blocks.18.ff_proj.weight_scale": "model-00002-of-00003.safetensors",
127
+ "model.transformer.blocks.19.att_proj.bias": "model-00002-of-00003.safetensors",
128
+ "model.transformer.blocks.19.att_proj.weight": "model-00002-of-00003.safetensors",
129
+ "model.transformer.blocks.19.att_proj.weight_scale": "model-00002-of-00003.safetensors",
130
+ "model.transformer.blocks.19.attn_norm.weight": "model-00002-of-00003.safetensors",
131
+ "model.transformer.blocks.19.attn_out.weight": "model-00002-of-00003.safetensors",
132
+ "model.transformer.blocks.19.attn_out.weight_scale": "model-00002-of-00003.safetensors",
133
+ "model.transformer.blocks.19.ff_norm.weight": "model-00002-of-00003.safetensors",
134
+ "model.transformer.blocks.19.ff_out.weight": "model-00002-of-00003.safetensors",
135
+ "model.transformer.blocks.19.ff_out.weight_scale": "model-00002-of-00003.safetensors",
136
+ "model.transformer.blocks.19.ff_proj.weight": "model-00002-of-00003.safetensors",
137
+ "model.transformer.blocks.19.ff_proj.weight_scale": "model-00002-of-00003.safetensors",
138
+ "model.transformer.blocks.2.att_proj.bias": "model-00001-of-00003.safetensors",
139
+ "model.transformer.blocks.2.att_proj.weight": "model-00001-of-00003.safetensors",
140
+ "model.transformer.blocks.2.att_proj.weight_scale": "model-00001-of-00003.safetensors",
141
+ "model.transformer.blocks.2.attn_norm.weight": "model-00001-of-00003.safetensors",
142
+ "model.transformer.blocks.2.attn_out.weight": "model-00001-of-00003.safetensors",
143
+ "model.transformer.blocks.2.attn_out.weight_scale": "model-00001-of-00003.safetensors",
144
+ "model.transformer.blocks.2.ff_norm.weight": "model-00001-of-00003.safetensors",
145
+ "model.transformer.blocks.2.ff_out.weight": "model-00001-of-00003.safetensors",
146
+ "model.transformer.blocks.2.ff_out.weight_scale": "model-00001-of-00003.safetensors",
147
+ "model.transformer.blocks.2.ff_proj.weight": "model-00001-of-00003.safetensors",
148
+ "model.transformer.blocks.2.ff_proj.weight_scale": "model-00001-of-00003.safetensors",
149
+ "model.transformer.blocks.20.att_proj.bias": "model-00002-of-00003.safetensors",
150
+ "model.transformer.blocks.20.att_proj.weight": "model-00002-of-00003.safetensors",
151
+ "model.transformer.blocks.20.att_proj.weight_scale": "model-00002-of-00003.safetensors",
152
+ "model.transformer.blocks.20.attn_norm.weight": "model-00002-of-00003.safetensors",
153
+ "model.transformer.blocks.20.attn_out.weight": "model-00002-of-00003.safetensors",
154
+ "model.transformer.blocks.20.attn_out.weight_scale": "model-00002-of-00003.safetensors",
155
+ "model.transformer.blocks.20.ff_norm.weight": "model-00002-of-00003.safetensors",
156
+ "model.transformer.blocks.20.ff_out.weight": "model-00002-of-00003.safetensors",
157
+ "model.transformer.blocks.20.ff_out.weight_scale": "model-00002-of-00003.safetensors",
158
+ "model.transformer.blocks.20.ff_proj.weight": "model-00002-of-00003.safetensors",
159
+ "model.transformer.blocks.20.ff_proj.weight_scale": "model-00002-of-00003.safetensors",
160
+ "model.transformer.blocks.21.att_proj.bias": "model-00002-of-00003.safetensors",
161
+ "model.transformer.blocks.21.att_proj.weight": "model-00002-of-00003.safetensors",
162
+ "model.transformer.blocks.21.att_proj.weight_scale": "model-00002-of-00003.safetensors",
163
+ "model.transformer.blocks.21.attn_norm.weight": "model-00002-of-00003.safetensors",
164
+ "model.transformer.blocks.21.attn_out.weight": "model-00002-of-00003.safetensors",
165
+ "model.transformer.blocks.21.attn_out.weight_scale": "model-00002-of-00003.safetensors",
166
+ "model.transformer.blocks.21.ff_norm.weight": "model-00002-of-00003.safetensors",
167
+ "model.transformer.blocks.21.ff_out.weight": "model-00002-of-00003.safetensors",
168
+ "model.transformer.blocks.21.ff_out.weight_scale": "model-00002-of-00003.safetensors",
169
+ "model.transformer.blocks.21.ff_proj.weight": "model-00002-of-00003.safetensors",
170
+ "model.transformer.blocks.21.ff_proj.weight_scale": "model-00002-of-00003.safetensors",
171
+ "model.transformer.blocks.22.att_proj.bias": "model-00002-of-00003.safetensors",
172
+ "model.transformer.blocks.22.att_proj.weight": "model-00002-of-00003.safetensors",
173
+ "model.transformer.blocks.22.att_proj.weight_scale": "model-00002-of-00003.safetensors",
174
+ "model.transformer.blocks.22.attn_norm.weight": "model-00002-of-00003.safetensors",
175
+ "model.transformer.blocks.22.attn_out.weight": "model-00002-of-00003.safetensors",
176
+ "model.transformer.blocks.22.attn_out.weight_scale": "model-00002-of-00003.safetensors",
177
+ "model.transformer.blocks.22.ff_norm.weight": "model-00002-of-00003.safetensors",
178
+ "model.transformer.blocks.22.ff_out.weight": "model-00002-of-00003.safetensors",
179
+ "model.transformer.blocks.22.ff_out.weight_scale": "model-00002-of-00003.safetensors",
180
+ "model.transformer.blocks.22.ff_proj.weight": "model-00002-of-00003.safetensors",
181
+ "model.transformer.blocks.22.ff_proj.weight_scale": "model-00002-of-00003.safetensors",
182
+ "model.transformer.blocks.23.att_proj.bias": "model-00002-of-00003.safetensors",
183
+ "model.transformer.blocks.23.att_proj.weight": "model-00002-of-00003.safetensors",
184
+ "model.transformer.blocks.23.att_proj.weight_scale": "model-00002-of-00003.safetensors",
185
+ "model.transformer.blocks.23.attn_norm.weight": "model-00002-of-00003.safetensors",
186
+ "model.transformer.blocks.23.attn_out.weight": "model-00002-of-00003.safetensors",
187
+ "model.transformer.blocks.23.attn_out.weight_scale": "model-00002-of-00003.safetensors",
188
+ "model.transformer.blocks.23.ff_norm.weight": "model-00002-of-00003.safetensors",
189
+ "model.transformer.blocks.23.ff_out.weight": "model-00002-of-00003.safetensors",
190
+ "model.transformer.blocks.23.ff_out.weight_scale": "model-00002-of-00003.safetensors",
191
+ "model.transformer.blocks.23.ff_proj.weight": "model-00002-of-00003.safetensors",
192
+ "model.transformer.blocks.23.ff_proj.weight_scale": "model-00002-of-00003.safetensors",
193
+ "model.transformer.blocks.24.att_proj.bias": "model-00002-of-00003.safetensors",
194
+ "model.transformer.blocks.24.att_proj.weight": "model-00002-of-00003.safetensors",
195
+ "model.transformer.blocks.24.att_proj.weight_scale": "model-00002-of-00003.safetensors",
196
+ "model.transformer.blocks.24.attn_norm.weight": "model-00002-of-00003.safetensors",
197
+ "model.transformer.blocks.24.attn_out.weight": "model-00002-of-00003.safetensors",
198
+ "model.transformer.blocks.24.attn_out.weight_scale": "model-00002-of-00003.safetensors",
199
+ "model.transformer.blocks.24.ff_norm.weight": "model-00002-of-00003.safetensors",
200
+ "model.transformer.blocks.24.ff_out.weight": "model-00002-of-00003.safetensors",
201
+ "model.transformer.blocks.24.ff_out.weight_scale": "model-00002-of-00003.safetensors",
202
+ "model.transformer.blocks.24.ff_proj.weight": "model-00002-of-00003.safetensors",
203
+ "model.transformer.blocks.24.ff_proj.weight_scale": "model-00002-of-00003.safetensors",
204
+ "model.transformer.blocks.25.att_proj.bias": "model-00002-of-00003.safetensors",
205
+ "model.transformer.blocks.25.att_proj.weight": "model-00002-of-00003.safetensors",
206
+ "model.transformer.blocks.25.att_proj.weight_scale": "model-00002-of-00003.safetensors",
207
+ "model.transformer.blocks.25.attn_norm.weight": "model-00002-of-00003.safetensors",
208
+ "model.transformer.blocks.25.attn_out.weight": "model-00002-of-00003.safetensors",
209
+ "model.transformer.blocks.25.attn_out.weight_scale": "model-00002-of-00003.safetensors",
210
+ "model.transformer.blocks.25.ff_norm.weight": "model-00002-of-00003.safetensors",
211
+ "model.transformer.blocks.25.ff_out.weight": "model-00002-of-00003.safetensors",
212
+ "model.transformer.blocks.25.ff_out.weight_scale": "model-00002-of-00003.safetensors",
213
+ "model.transformer.blocks.25.ff_proj.weight": "model-00002-of-00003.safetensors",
214
+ "model.transformer.blocks.25.ff_proj.weight_scale": "model-00002-of-00003.safetensors",
215
+ "model.transformer.blocks.26.att_proj.bias": "model-00002-of-00003.safetensors",
216
+ "model.transformer.blocks.26.att_proj.weight": "model-00002-of-00003.safetensors",
217
+ "model.transformer.blocks.26.att_proj.weight_scale": "model-00002-of-00003.safetensors",
218
+ "model.transformer.blocks.26.attn_norm.weight": "model-00002-of-00003.safetensors",
219
+ "model.transformer.blocks.26.attn_out.weight": "model-00002-of-00003.safetensors",
220
+ "model.transformer.blocks.26.attn_out.weight_scale": "model-00002-of-00003.safetensors",
221
+ "model.transformer.blocks.26.ff_norm.weight": "model-00002-of-00003.safetensors",
222
+ "model.transformer.blocks.26.ff_out.weight": "model-00002-of-00003.safetensors",
223
+ "model.transformer.blocks.26.ff_out.weight_scale": "model-00002-of-00003.safetensors",
224
+ "model.transformer.blocks.26.ff_proj.weight": "model-00002-of-00003.safetensors",
225
+ "model.transformer.blocks.26.ff_proj.weight_scale": "model-00002-of-00003.safetensors",
226
+ "model.transformer.blocks.27.att_proj.bias": "model-00002-of-00003.safetensors",
227
+ "model.transformer.blocks.27.att_proj.weight": "model-00002-of-00003.safetensors",
228
+ "model.transformer.blocks.27.att_proj.weight_scale": "model-00002-of-00003.safetensors",
229
+ "model.transformer.blocks.27.attn_norm.weight": "model-00002-of-00003.safetensors",
230
+ "model.transformer.blocks.27.attn_out.weight": "model-00002-of-00003.safetensors",
231
+ "model.transformer.blocks.27.attn_out.weight_scale": "model-00002-of-00003.safetensors",
232
+ "model.transformer.blocks.27.ff_norm.weight": "model-00002-of-00003.safetensors",
233
+ "model.transformer.blocks.27.ff_out.weight": "model-00002-of-00003.safetensors",
234
+ "model.transformer.blocks.27.ff_out.weight_scale": "model-00002-of-00003.safetensors",
235
+ "model.transformer.blocks.27.ff_proj.weight": "model-00002-of-00003.safetensors",
236
+ "model.transformer.blocks.27.ff_proj.weight_scale": "model-00002-of-00003.safetensors",
237
+ "model.transformer.blocks.3.att_proj.bias": "model-00001-of-00003.safetensors",
238
+ "model.transformer.blocks.3.att_proj.weight": "model-00001-of-00003.safetensors",
239
+ "model.transformer.blocks.3.att_proj.weight_scale": "model-00001-of-00003.safetensors",
240
+ "model.transformer.blocks.3.attn_norm.weight": "model-00001-of-00003.safetensors",
241
+ "model.transformer.blocks.3.attn_out.weight": "model-00001-of-00003.safetensors",
242
+ "model.transformer.blocks.3.attn_out.weight_scale": "model-00001-of-00003.safetensors",
243
+ "model.transformer.blocks.3.ff_norm.weight": "model-00001-of-00003.safetensors",
244
+ "model.transformer.blocks.3.ff_out.weight": "model-00001-of-00003.safetensors",
245
+ "model.transformer.blocks.3.ff_out.weight_scale": "model-00001-of-00003.safetensors",
246
+ "model.transformer.blocks.3.ff_proj.weight": "model-00001-of-00003.safetensors",
247
+ "model.transformer.blocks.3.ff_proj.weight_scale": "model-00001-of-00003.safetensors",
248
+ "model.transformer.blocks.4.att_proj.bias": "model-00001-of-00003.safetensors",
249
+ "model.transformer.blocks.4.att_proj.weight": "model-00001-of-00003.safetensors",
250
+ "model.transformer.blocks.4.att_proj.weight_scale": "model-00001-of-00003.safetensors",
251
+ "model.transformer.blocks.4.attn_norm.weight": "model-00001-of-00003.safetensors",
252
+ "model.transformer.blocks.4.attn_out.weight": "model-00001-of-00003.safetensors",
253
+ "model.transformer.blocks.4.attn_out.weight_scale": "model-00001-of-00003.safetensors",
254
+ "model.transformer.blocks.4.ff_norm.weight": "model-00001-of-00003.safetensors",
255
+ "model.transformer.blocks.4.ff_out.weight": "model-00001-of-00003.safetensors",
256
+ "model.transformer.blocks.4.ff_out.weight_scale": "model-00001-of-00003.safetensors",
257
+ "model.transformer.blocks.4.ff_proj.weight": "model-00001-of-00003.safetensors",
258
+ "model.transformer.blocks.4.ff_proj.weight_scale": "model-00001-of-00003.safetensors",
259
+ "model.transformer.blocks.5.att_proj.bias": "model-00001-of-00003.safetensors",
260
+ "model.transformer.blocks.5.att_proj.weight": "model-00001-of-00003.safetensors",
261
+ "model.transformer.blocks.5.att_proj.weight_scale": "model-00001-of-00003.safetensors",
262
+ "model.transformer.blocks.5.attn_norm.weight": "model-00001-of-00003.safetensors",
263
+ "model.transformer.blocks.5.attn_out.weight": "model-00001-of-00003.safetensors",
264
+ "model.transformer.blocks.5.attn_out.weight_scale": "model-00001-of-00003.safetensors",
265
+ "model.transformer.blocks.5.ff_norm.weight": "model-00001-of-00003.safetensors",
266
+ "model.transformer.blocks.5.ff_out.weight": "model-00001-of-00003.safetensors",
267
+ "model.transformer.blocks.5.ff_out.weight_scale": "model-00001-of-00003.safetensors",
268
+ "model.transformer.blocks.5.ff_proj.weight": "model-00001-of-00003.safetensors",
269
+ "model.transformer.blocks.5.ff_proj.weight_scale": "model-00001-of-00003.safetensors",
270
+ "model.transformer.blocks.6.att_proj.bias": "model-00001-of-00003.safetensors",
271
+ "model.transformer.blocks.6.att_proj.weight": "model-00001-of-00003.safetensors",
272
+ "model.transformer.blocks.6.att_proj.weight_scale": "model-00001-of-00003.safetensors",
273
+ "model.transformer.blocks.6.attn_norm.weight": "model-00001-of-00003.safetensors",
274
+ "model.transformer.blocks.6.attn_out.weight": "model-00001-of-00003.safetensors",
275
+ "model.transformer.blocks.6.attn_out.weight_scale": "model-00001-of-00003.safetensors",
276
+ "model.transformer.blocks.6.ff_norm.weight": "model-00001-of-00003.safetensors",
277
+ "model.transformer.blocks.6.ff_out.weight": "model-00001-of-00003.safetensors",
278
+ "model.transformer.blocks.6.ff_out.weight_scale": "model-00001-of-00003.safetensors",
279
+ "model.transformer.blocks.6.ff_proj.weight": "model-00001-of-00003.safetensors",
280
+ "model.transformer.blocks.6.ff_proj.weight_scale": "model-00001-of-00003.safetensors",
281
+ "model.transformer.blocks.7.att_proj.bias": "model-00001-of-00003.safetensors",
282
+ "model.transformer.blocks.7.att_proj.weight": "model-00001-of-00003.safetensors",
283
+ "model.transformer.blocks.7.att_proj.weight_scale": "model-00001-of-00003.safetensors",
284
+ "model.transformer.blocks.7.attn_norm.weight": "model-00001-of-00003.safetensors",
285
+ "model.transformer.blocks.7.attn_out.weight": "model-00001-of-00003.safetensors",
286
+ "model.transformer.blocks.7.attn_out.weight_scale": "model-00001-of-00003.safetensors",
287
+ "model.transformer.blocks.7.ff_norm.weight": "model-00001-of-00003.safetensors",
288
+ "model.transformer.blocks.7.ff_out.weight": "model-00001-of-00003.safetensors",
289
+ "model.transformer.blocks.7.ff_out.weight_scale": "model-00001-of-00003.safetensors",
290
+ "model.transformer.blocks.7.ff_proj.weight": "model-00001-of-00003.safetensors",
291
+ "model.transformer.blocks.7.ff_proj.weight_scale": "model-00001-of-00003.safetensors",
292
+ "model.transformer.blocks.8.att_proj.bias": "model-00001-of-00003.safetensors",
293
+ "model.transformer.blocks.8.att_proj.weight": "model-00001-of-00003.safetensors",
294
+ "model.transformer.blocks.8.att_proj.weight_scale": "model-00001-of-00003.safetensors",
295
+ "model.transformer.blocks.8.attn_norm.weight": "model-00001-of-00003.safetensors",
296
+ "model.transformer.blocks.8.attn_out.weight": "model-00001-of-00003.safetensors",
297
+ "model.transformer.blocks.8.attn_out.weight_scale": "model-00001-of-00003.safetensors",
298
+ "model.transformer.blocks.8.ff_norm.weight": "model-00001-of-00003.safetensors",
299
+ "model.transformer.blocks.8.ff_out.weight": "model-00001-of-00003.safetensors",
300
+ "model.transformer.blocks.8.ff_out.weight_scale": "model-00001-of-00003.safetensors",
301
+ "model.transformer.blocks.8.ff_proj.weight": "model-00001-of-00003.safetensors",
302
+ "model.transformer.blocks.8.ff_proj.weight_scale": "model-00001-of-00003.safetensors",
303
+ "model.transformer.blocks.9.att_proj.bias": "model-00001-of-00003.safetensors",
304
+ "model.transformer.blocks.9.att_proj.weight": "model-00001-of-00003.safetensors",
305
+ "model.transformer.blocks.9.att_proj.weight_scale": "model-00001-of-00003.safetensors",
306
+ "model.transformer.blocks.9.attn_norm.weight": "model-00001-of-00003.safetensors",
307
+ "model.transformer.blocks.9.attn_out.weight": "model-00001-of-00003.safetensors",
308
+ "model.transformer.blocks.9.attn_out.weight_scale": "model-00001-of-00003.safetensors",
309
+ "model.transformer.blocks.9.ff_norm.weight": "model-00001-of-00003.safetensors",
310
+ "model.transformer.blocks.9.ff_out.weight": "model-00001-of-00003.safetensors",
311
+ "model.transformer.blocks.9.ff_out.weight_scale": "model-00001-of-00003.safetensors",
312
+ "model.transformer.blocks.9.ff_proj.weight": "model-00001-of-00003.safetensors",
313
+ "model.transformer.blocks.9.ff_proj.weight_scale": "model-00001-of-00003.safetensors",
314
+ "model.transformer.ff_out.weight": "model-00002-of-00003.safetensors",
315
+ "model.transformer.ff_out.weight_scale": "model-00002-of-00003.safetensors",
316
+ "model.transformer.ln_f.weight": "model-00001-of-00003.safetensors",
317
+ "model.transformer.wte.embedding": "model-00001-of-00003.safetensors",
318
+ "model.transformer.wte.new_embedding": "model-00001-of-00003.safetensors",
319
+ "model.vision_backbone.image_pooling_2d.wk.bias": "model-00003-of-00003.safetensors",
320
+ "model.vision_backbone.image_pooling_2d.wk.weight": "model-00003-of-00003.safetensors",
321
+ "model.vision_backbone.image_pooling_2d.wo.bias": "model-00003-of-00003.safetensors",
322
+ "model.vision_backbone.image_pooling_2d.wo.weight": "model-00003-of-00003.safetensors",
323
+ "model.vision_backbone.image_pooling_2d.wq.bias": "model-00003-of-00003.safetensors",
324
+ "model.vision_backbone.image_pooling_2d.wq.weight": "model-00003-of-00003.safetensors",
325
+ "model.vision_backbone.image_pooling_2d.wv.bias": "model-00003-of-00003.safetensors",
326
+ "model.vision_backbone.image_pooling_2d.wv.weight": "model-00003-of-00003.safetensors",
327
+ "model.vision_backbone.image_projector.w1.weight": "model-00003-of-00003.safetensors",
328
+ "model.vision_backbone.image_projector.w2.weight": "model-00003-of-00003.safetensors",
329
+ "model.vision_backbone.image_projector.w3.weight": "model-00003-of-00003.safetensors",
330
+ "model.vision_backbone.image_vit.class_embedding": "model-00002-of-00003.safetensors",
331
+ "model.vision_backbone.image_vit.patch_embedding.weight": "model-00002-of-00003.safetensors",
332
+ "model.vision_backbone.image_vit.positional_embedding": "model-00002-of-00003.safetensors",
333
+ "model.vision_backbone.image_vit.pre_ln.bias": "model-00002-of-00003.safetensors",
334
+ "model.vision_backbone.image_vit.pre_ln.weight": "model-00002-of-00003.safetensors",
335
+ "model.vision_backbone.image_vit.transformer.resblocks.0.attention.wk.bias": "model-00002-of-00003.safetensors",
336
+ "model.vision_backbone.image_vit.transformer.resblocks.0.attention.wk.weight": "model-00002-of-00003.safetensors",
337
+ "model.vision_backbone.image_vit.transformer.resblocks.0.attention.wo.bias": "model-00002-of-00003.safetensors",
338
+ "model.vision_backbone.image_vit.transformer.resblocks.0.attention.wo.weight": "model-00002-of-00003.safetensors",
339
+ "model.vision_backbone.image_vit.transformer.resblocks.0.attention.wq.bias": "model-00002-of-00003.safetensors",
340
+ "model.vision_backbone.image_vit.transformer.resblocks.0.attention.wq.weight": "model-00002-of-00003.safetensors",
341
+ "model.vision_backbone.image_vit.transformer.resblocks.0.attention.wv.bias": "model-00002-of-00003.safetensors",
342
+ "model.vision_backbone.image_vit.transformer.resblocks.0.attention.wv.weight": "model-00002-of-00003.safetensors",
343
+ "model.vision_backbone.image_vit.transformer.resblocks.0.attention_norm.bias": "model-00002-of-00003.safetensors",
344
+ "model.vision_backbone.image_vit.transformer.resblocks.0.attention_norm.weight": "model-00002-of-00003.safetensors",
345
+ "model.vision_backbone.image_vit.transformer.resblocks.0.feed_forward.w1.bias": "model-00002-of-00003.safetensors",
346
+ "model.vision_backbone.image_vit.transformer.resblocks.0.feed_forward.w1.weight": "model-00002-of-00003.safetensors",
347
+ "model.vision_backbone.image_vit.transformer.resblocks.0.feed_forward.w2.bias": "model-00002-of-00003.safetensors",
348
+ "model.vision_backbone.image_vit.transformer.resblocks.0.feed_forward.w2.weight": "model-00002-of-00003.safetensors",
349
+ "model.vision_backbone.image_vit.transformer.resblocks.0.ffn_norm.bias": "model-00002-of-00003.safetensors",
350
+ "model.vision_backbone.image_vit.transformer.resblocks.0.ffn_norm.weight": "model-00002-of-00003.safetensors",
351
+ "model.vision_backbone.image_vit.transformer.resblocks.1.attention.wk.bias": "model-00002-of-00003.safetensors",
352
+ "model.vision_backbone.image_vit.transformer.resblocks.1.attention.wk.weight": "model-00002-of-00003.safetensors",
353
+ "model.vision_backbone.image_vit.transformer.resblocks.1.attention.wo.bias": "model-00002-of-00003.safetensors",
354
+ "model.vision_backbone.image_vit.transformer.resblocks.1.attention.wo.weight": "model-00002-of-00003.safetensors",
355
+ "model.vision_backbone.image_vit.transformer.resblocks.1.attention.wq.bias": "model-00002-of-00003.safetensors",
356
+ "model.vision_backbone.image_vit.transformer.resblocks.1.attention.wq.weight": "model-00002-of-00003.safetensors",
357
+ "model.vision_backbone.image_vit.transformer.resblocks.1.attention.wv.bias": "model-00002-of-00003.safetensors",
358
+ "model.vision_backbone.image_vit.transformer.resblocks.1.attention.wv.weight": "model-00002-of-00003.safetensors",
359
+ "model.vision_backbone.image_vit.transformer.resblocks.1.attention_norm.bias": "model-00002-of-00003.safetensors",
360
+ "model.vision_backbone.image_vit.transformer.resblocks.1.attention_norm.weight": "model-00002-of-00003.safetensors",
361
+ "model.vision_backbone.image_vit.transformer.resblocks.1.feed_forward.w1.bias": "model-00002-of-00003.safetensors",
362
+ "model.vision_backbone.image_vit.transformer.resblocks.1.feed_forward.w1.weight": "model-00002-of-00003.safetensors",
363
+ "model.vision_backbone.image_vit.transformer.resblocks.1.feed_forward.w2.bias": "model-00002-of-00003.safetensors",
364
+ "model.vision_backbone.image_vit.transformer.resblocks.1.feed_forward.w2.weight": "model-00002-of-00003.safetensors",
365
+ "model.vision_backbone.image_vit.transformer.resblocks.1.ffn_norm.bias": "model-00002-of-00003.safetensors",
366
+ "model.vision_backbone.image_vit.transformer.resblocks.1.ffn_norm.weight": "model-00002-of-00003.safetensors",
367
+ "model.vision_backbone.image_vit.transformer.resblocks.10.attention.wk.bias": "model-00002-of-00003.safetensors",
368
+ "model.vision_backbone.image_vit.transformer.resblocks.10.attention.wk.weight": "model-00002-of-00003.safetensors",
369
+ "model.vision_backbone.image_vit.transformer.resblocks.10.attention.wo.bias": "model-00002-of-00003.safetensors",
370
+ "model.vision_backbone.image_vit.transformer.resblocks.10.attention.wo.weight": "model-00002-of-00003.safetensors",
371
+ "model.vision_backbone.image_vit.transformer.resblocks.10.attention.wq.bias": "model-00002-of-00003.safetensors",
372
+ "model.vision_backbone.image_vit.transformer.resblocks.10.attention.wq.weight": "model-00002-of-00003.safetensors",
373
+ "model.vision_backbone.image_vit.transformer.resblocks.10.attention.wv.bias": "model-00002-of-00003.safetensors",
374
+ "model.vision_backbone.image_vit.transformer.resblocks.10.attention.wv.weight": "model-00002-of-00003.safetensors",
375
+ "model.vision_backbone.image_vit.transformer.resblocks.10.attention_norm.bias": "model-00002-of-00003.safetensors",
376
+ "model.vision_backbone.image_vit.transformer.resblocks.10.attention_norm.weight": "model-00002-of-00003.safetensors",
377
+ "model.vision_backbone.image_vit.transformer.resblocks.10.feed_forward.w1.bias": "model-00002-of-00003.safetensors",
378
+ "model.vision_backbone.image_vit.transformer.resblocks.10.feed_forward.w1.weight": "model-00002-of-00003.safetensors",
379
+ "model.vision_backbone.image_vit.transformer.resblocks.10.feed_forward.w2.bias": "model-00002-of-00003.safetensors",
380
+ "model.vision_backbone.image_vit.transformer.resblocks.10.feed_forward.w2.weight": "model-00002-of-00003.safetensors",
381
+ "model.vision_backbone.image_vit.transformer.resblocks.10.ffn_norm.bias": "model-00002-of-00003.safetensors",
382
+ "model.vision_backbone.image_vit.transformer.resblocks.10.ffn_norm.weight": "model-00002-of-00003.safetensors",
383
+ "model.vision_backbone.image_vit.transformer.resblocks.11.attention.wk.bias": "model-00002-of-00003.safetensors",
384
+ "model.vision_backbone.image_vit.transformer.resblocks.11.attention.wk.weight": "model-00002-of-00003.safetensors",
385
+ "model.vision_backbone.image_vit.transformer.resblocks.11.attention.wo.bias": "model-00002-of-00003.safetensors",
386
+ "model.vision_backbone.image_vit.transformer.resblocks.11.attention.wo.weight": "model-00002-of-00003.safetensors",
387
+ "model.vision_backbone.image_vit.transformer.resblocks.11.attention.wq.bias": "model-00002-of-00003.safetensors",
388
+ "model.vision_backbone.image_vit.transformer.resblocks.11.attention.wq.weight": "model-00002-of-00003.safetensors",
389
+ "model.vision_backbone.image_vit.transformer.resblocks.11.attention.wv.bias": "model-00002-of-00003.safetensors",
390
+ "model.vision_backbone.image_vit.transformer.resblocks.11.attention.wv.weight": "model-00002-of-00003.safetensors",
391
+ "model.vision_backbone.image_vit.transformer.resblocks.11.attention_norm.bias": "model-00002-of-00003.safetensors",
392
+ "model.vision_backbone.image_vit.transformer.resblocks.11.attention_norm.weight": "model-00002-of-00003.safetensors",
393
+ "model.vision_backbone.image_vit.transformer.resblocks.11.feed_forward.w1.bias": "model-00002-of-00003.safetensors",
394
+ "model.vision_backbone.image_vit.transformer.resblocks.11.feed_forward.w1.weight": "model-00002-of-00003.safetensors",
395
+ "model.vision_backbone.image_vit.transformer.resblocks.11.feed_forward.w2.bias": "model-00002-of-00003.safetensors",
396
+ "model.vision_backbone.image_vit.transformer.resblocks.11.feed_forward.w2.weight": "model-00002-of-00003.safetensors",
397
+ "model.vision_backbone.image_vit.transformer.resblocks.11.ffn_norm.bias": "model-00002-of-00003.safetensors",
398
+ "model.vision_backbone.image_vit.transformer.resblocks.11.ffn_norm.weight": "model-00002-of-00003.safetensors",
399
+ "model.vision_backbone.image_vit.transformer.resblocks.12.attention.wk.bias": "model-00002-of-00003.safetensors",
400
+ "model.vision_backbone.image_vit.transformer.resblocks.12.attention.wk.weight": "model-00002-of-00003.safetensors",
401
+ "model.vision_backbone.image_vit.transformer.resblocks.12.attention.wo.bias": "model-00002-of-00003.safetensors",
402
+ "model.vision_backbone.image_vit.transformer.resblocks.12.attention.wo.weight": "model-00002-of-00003.safetensors",
403
+ "model.vision_backbone.image_vit.transformer.resblocks.12.attention.wq.bias": "model-00002-of-00003.safetensors",
404
+ "model.vision_backbone.image_vit.transformer.resblocks.12.attention.wq.weight": "model-00002-of-00003.safetensors",
405
+ "model.vision_backbone.image_vit.transformer.resblocks.12.attention.wv.bias": "model-00002-of-00003.safetensors",
406
+ "model.vision_backbone.image_vit.transformer.resblocks.12.attention.wv.weight": "model-00002-of-00003.safetensors",
407
+ "model.vision_backbone.image_vit.transformer.resblocks.12.attention_norm.bias": "model-00002-of-00003.safetensors",
408
+ "model.vision_backbone.image_vit.transformer.resblocks.12.attention_norm.weight": "model-00002-of-00003.safetensors",
409
+ "model.vision_backbone.image_vit.transformer.resblocks.12.feed_forward.w1.bias": "model-00002-of-00003.safetensors",
410
+ "model.vision_backbone.image_vit.transformer.resblocks.12.feed_forward.w1.weight": "model-00002-of-00003.safetensors",
411
+ "model.vision_backbone.image_vit.transformer.resblocks.12.feed_forward.w2.bias": "model-00002-of-00003.safetensors",
412
+ "model.vision_backbone.image_vit.transformer.resblocks.12.feed_forward.w2.weight": "model-00002-of-00003.safetensors",
413
+ "model.vision_backbone.image_vit.transformer.resblocks.12.ffn_norm.bias": "model-00002-of-00003.safetensors",
414
+ "model.vision_backbone.image_vit.transformer.resblocks.12.ffn_norm.weight": "model-00002-of-00003.safetensors",
415
+ "model.vision_backbone.image_vit.transformer.resblocks.13.attention.wk.bias": "model-00002-of-00003.safetensors",
416
+ "model.vision_backbone.image_vit.transformer.resblocks.13.attention.wk.weight": "model-00002-of-00003.safetensors",
417
+ "model.vision_backbone.image_vit.transformer.resblocks.13.attention.wo.bias": "model-00002-of-00003.safetensors",
418
+ "model.vision_backbone.image_vit.transformer.resblocks.13.attention.wo.weight": "model-00002-of-00003.safetensors",
419
+ "model.vision_backbone.image_vit.transformer.resblocks.13.attention.wq.bias": "model-00002-of-00003.safetensors",
420
+ "model.vision_backbone.image_vit.transformer.resblocks.13.attention.wq.weight": "model-00002-of-00003.safetensors",
421
+ "model.vision_backbone.image_vit.transformer.resblocks.13.attention.wv.bias": "model-00002-of-00003.safetensors",
422
+ "model.vision_backbone.image_vit.transformer.resblocks.13.attention.wv.weight": "model-00002-of-00003.safetensors",
423
+ "model.vision_backbone.image_vit.transformer.resblocks.13.attention_norm.bias": "model-00002-of-00003.safetensors",
424
+ "model.vision_backbone.image_vit.transformer.resblocks.13.attention_norm.weight": "model-00002-of-00003.safetensors",
425
+ "model.vision_backbone.image_vit.transformer.resblocks.13.feed_forward.w1.bias": "model-00002-of-00003.safetensors",
426
+ "model.vision_backbone.image_vit.transformer.resblocks.13.feed_forward.w1.weight": "model-00002-of-00003.safetensors",
427
+ "model.vision_backbone.image_vit.transformer.resblocks.13.feed_forward.w2.bias": "model-00002-of-00003.safetensors",
428
+ "model.vision_backbone.image_vit.transformer.resblocks.13.feed_forward.w2.weight": "model-00002-of-00003.safetensors",
429
+ "model.vision_backbone.image_vit.transformer.resblocks.13.ffn_norm.bias": "model-00002-of-00003.safetensors",
430
+ "model.vision_backbone.image_vit.transformer.resblocks.13.ffn_norm.weight": "model-00002-of-00003.safetensors",
431
+ "model.vision_backbone.image_vit.transformer.resblocks.14.attention.wk.bias": "model-00002-of-00003.safetensors",
432
+ "model.vision_backbone.image_vit.transformer.resblocks.14.attention.wk.weight": "model-00002-of-00003.safetensors",
433
+ "model.vision_backbone.image_vit.transformer.resblocks.14.attention.wo.bias": "model-00002-of-00003.safetensors",
434
+ "model.vision_backbone.image_vit.transformer.resblocks.14.attention.wo.weight": "model-00002-of-00003.safetensors",
435
+ "model.vision_backbone.image_vit.transformer.resblocks.14.attention.wq.bias": "model-00002-of-00003.safetensors",
436
+ "model.vision_backbone.image_vit.transformer.resblocks.14.attention.wq.weight": "model-00002-of-00003.safetensors",
437
+ "model.vision_backbone.image_vit.transformer.resblocks.14.attention.wv.bias": "model-00002-of-00003.safetensors",
438
+ "model.vision_backbone.image_vit.transformer.resblocks.14.attention.wv.weight": "model-00002-of-00003.safetensors",
439
+ "model.vision_backbone.image_vit.transformer.resblocks.14.attention_norm.bias": "model-00003-of-00003.safetensors",
440
+ "model.vision_backbone.image_vit.transformer.resblocks.14.attention_norm.weight": "model-00003-of-00003.safetensors",
441
+ "model.vision_backbone.image_vit.transformer.resblocks.14.feed_forward.w1.bias": "model-00003-of-00003.safetensors",
442
+ "model.vision_backbone.image_vit.transformer.resblocks.14.feed_forward.w1.weight": "model-00003-of-00003.safetensors",
443
+ "model.vision_backbone.image_vit.transformer.resblocks.14.feed_forward.w2.bias": "model-00003-of-00003.safetensors",
444
+ "model.vision_backbone.image_vit.transformer.resblocks.14.feed_forward.w2.weight": "model-00003-of-00003.safetensors",
445
+ "model.vision_backbone.image_vit.transformer.resblocks.14.ffn_norm.bias": "model-00003-of-00003.safetensors",
446
+ "model.vision_backbone.image_vit.transformer.resblocks.14.ffn_norm.weight": "model-00003-of-00003.safetensors",
447
+ "model.vision_backbone.image_vit.transformer.resblocks.15.attention.wk.bias": "model-00003-of-00003.safetensors",
448
+ "model.vision_backbone.image_vit.transformer.resblocks.15.attention.wk.weight": "model-00003-of-00003.safetensors",
449
+ "model.vision_backbone.image_vit.transformer.resblocks.15.attention.wo.bias": "model-00003-of-00003.safetensors",
450
+ "model.vision_backbone.image_vit.transformer.resblocks.15.attention.wo.weight": "model-00003-of-00003.safetensors",
451
+ "model.vision_backbone.image_vit.transformer.resblocks.15.attention.wq.bias": "model-00003-of-00003.safetensors",
452
+ "model.vision_backbone.image_vit.transformer.resblocks.15.attention.wq.weight": "model-00003-of-00003.safetensors",
453
+ "model.vision_backbone.image_vit.transformer.resblocks.15.attention.wv.bias": "model-00003-of-00003.safetensors",
454
+ "model.vision_backbone.image_vit.transformer.resblocks.15.attention.wv.weight": "model-00003-of-00003.safetensors",
455
+ "model.vision_backbone.image_vit.transformer.resblocks.15.attention_norm.bias": "model-00003-of-00003.safetensors",
456
+ "model.vision_backbone.image_vit.transformer.resblocks.15.attention_norm.weight": "model-00003-of-00003.safetensors",
457
+ "model.vision_backbone.image_vit.transformer.resblocks.15.feed_forward.w1.bias": "model-00003-of-00003.safetensors",
458
+ "model.vision_backbone.image_vit.transformer.resblocks.15.feed_forward.w1.weight": "model-00003-of-00003.safetensors",
459
+ "model.vision_backbone.image_vit.transformer.resblocks.15.feed_forward.w2.bias": "model-00003-of-00003.safetensors",
460
+ "model.vision_backbone.image_vit.transformer.resblocks.15.feed_forward.w2.weight": "model-00003-of-00003.safetensors",
461
+ "model.vision_backbone.image_vit.transformer.resblocks.15.ffn_norm.bias": "model-00003-of-00003.safetensors",
462
+ "model.vision_backbone.image_vit.transformer.resblocks.15.ffn_norm.weight": "model-00003-of-00003.safetensors",
463
+ "model.vision_backbone.image_vit.transformer.resblocks.16.attention.wk.bias": "model-00003-of-00003.safetensors",
464
+ "model.vision_backbone.image_vit.transformer.resblocks.16.attention.wk.weight": "model-00003-of-00003.safetensors",
465
+ "model.vision_backbone.image_vit.transformer.resblocks.16.attention.wo.bias": "model-00003-of-00003.safetensors",
466
+ "model.vision_backbone.image_vit.transformer.resblocks.16.attention.wo.weight": "model-00003-of-00003.safetensors",
467
+ "model.vision_backbone.image_vit.transformer.resblocks.16.attention.wq.bias": "model-00003-of-00003.safetensors",
468
+ "model.vision_backbone.image_vit.transformer.resblocks.16.attention.wq.weight": "model-00003-of-00003.safetensors",
469
+ "model.vision_backbone.image_vit.transformer.resblocks.16.attention.wv.bias": "model-00003-of-00003.safetensors",
470
+ "model.vision_backbone.image_vit.transformer.resblocks.16.attention.wv.weight": "model-00003-of-00003.safetensors",
471
+ "model.vision_backbone.image_vit.transformer.resblocks.16.attention_norm.bias": "model-00003-of-00003.safetensors",
472
+ "model.vision_backbone.image_vit.transformer.resblocks.16.attention_norm.weight": "model-00003-of-00003.safetensors",
473
+ "model.vision_backbone.image_vit.transformer.resblocks.16.feed_forward.w1.bias": "model-00003-of-00003.safetensors",
474
+ "model.vision_backbone.image_vit.transformer.resblocks.16.feed_forward.w1.weight": "model-00003-of-00003.safetensors",
475
+ "model.vision_backbone.image_vit.transformer.resblocks.16.feed_forward.w2.bias": "model-00003-of-00003.safetensors",
476
+ "model.vision_backbone.image_vit.transformer.resblocks.16.feed_forward.w2.weight": "model-00003-of-00003.safetensors",
477
+ "model.vision_backbone.image_vit.transformer.resblocks.16.ffn_norm.bias": "model-00003-of-00003.safetensors",
478
+ "model.vision_backbone.image_vit.transformer.resblocks.16.ffn_norm.weight": "model-00003-of-00003.safetensors",
479
+ "model.vision_backbone.image_vit.transformer.resblocks.17.attention.wk.bias": "model-00003-of-00003.safetensors",
480
+ "model.vision_backbone.image_vit.transformer.resblocks.17.attention.wk.weight": "model-00003-of-00003.safetensors",
481
+ "model.vision_backbone.image_vit.transformer.resblocks.17.attention.wo.bias": "model-00003-of-00003.safetensors",
482
+ "model.vision_backbone.image_vit.transformer.resblocks.17.attention.wo.weight": "model-00003-of-00003.safetensors",
483
+ "model.vision_backbone.image_vit.transformer.resblocks.17.attention.wq.bias": "model-00003-of-00003.safetensors",
484
+ "model.vision_backbone.image_vit.transformer.resblocks.17.attention.wq.weight": "model-00003-of-00003.safetensors",
485
+ "model.vision_backbone.image_vit.transformer.resblocks.17.attention.wv.bias": "model-00003-of-00003.safetensors",
486
+ "model.vision_backbone.image_vit.transformer.resblocks.17.attention.wv.weight": "model-00003-of-00003.safetensors",
487
+ "model.vision_backbone.image_vit.transformer.resblocks.17.attention_norm.bias": "model-00003-of-00003.safetensors",
488
+ "model.vision_backbone.image_vit.transformer.resblocks.17.attention_norm.weight": "model-00003-of-00003.safetensors",
489
+ "model.vision_backbone.image_vit.transformer.resblocks.17.feed_forward.w1.bias": "model-00003-of-00003.safetensors",
490
+ "model.vision_backbone.image_vit.transformer.resblocks.17.feed_forward.w1.weight": "model-00003-of-00003.safetensors",
491
+ "model.vision_backbone.image_vit.transformer.resblocks.17.feed_forward.w2.bias": "model-00003-of-00003.safetensors",
492
+ "model.vision_backbone.image_vit.transformer.resblocks.17.feed_forward.w2.weight": "model-00003-of-00003.safetensors",
493
+ "model.vision_backbone.image_vit.transformer.resblocks.17.ffn_norm.bias": "model-00003-of-00003.safetensors",
494
+ "model.vision_backbone.image_vit.transformer.resblocks.17.ffn_norm.weight": "model-00003-of-00003.safetensors",
495
+ "model.vision_backbone.image_vit.transformer.resblocks.18.attention.wk.bias": "model-00003-of-00003.safetensors",
496
+ "model.vision_backbone.image_vit.transformer.resblocks.18.attention.wk.weight": "model-00003-of-00003.safetensors",
497
+ "model.vision_backbone.image_vit.transformer.resblocks.18.attention.wo.bias": "model-00003-of-00003.safetensors",
498
+ "model.vision_backbone.image_vit.transformer.resblocks.18.attention.wo.weight": "model-00003-of-00003.safetensors",
499
+ "model.vision_backbone.image_vit.transformer.resblocks.18.attention.wq.bias": "model-00003-of-00003.safetensors",
500
+ "model.vision_backbone.image_vit.transformer.resblocks.18.attention.wq.weight": "model-00003-of-00003.safetensors",
501
+ "model.vision_backbone.image_vit.transformer.resblocks.18.attention.wv.bias": "model-00003-of-00003.safetensors",
502
+ "model.vision_backbone.image_vit.transformer.resblocks.18.attention.wv.weight": "model-00003-of-00003.safetensors",
503
+ "model.vision_backbone.image_vit.transformer.resblocks.18.attention_norm.bias": "model-00003-of-00003.safetensors",
504
+ "model.vision_backbone.image_vit.transformer.resblocks.18.attention_norm.weight": "model-00003-of-00003.safetensors",
505
+ "model.vision_backbone.image_vit.transformer.resblocks.18.feed_forward.w1.bias": "model-00003-of-00003.safetensors",
506
+ "model.vision_backbone.image_vit.transformer.resblocks.18.feed_forward.w1.weight": "model-00003-of-00003.safetensors",
507
+ "model.vision_backbone.image_vit.transformer.resblocks.18.feed_forward.w2.bias": "model-00003-of-00003.safetensors",
508
+ "model.vision_backbone.image_vit.transformer.resblocks.18.feed_forward.w2.weight": "model-00003-of-00003.safetensors",
509
+ "model.vision_backbone.image_vit.transformer.resblocks.18.ffn_norm.bias": "model-00003-of-00003.safetensors",
510
+ "model.vision_backbone.image_vit.transformer.resblocks.18.ffn_norm.weight": "model-00003-of-00003.safetensors",
511
+ "model.vision_backbone.image_vit.transformer.resblocks.19.attention.wk.bias": "model-00003-of-00003.safetensors",
512
+ "model.vision_backbone.image_vit.transformer.resblocks.19.attention.wk.weight": "model-00003-of-00003.safetensors",
513
+ "model.vision_backbone.image_vit.transformer.resblocks.19.attention.wo.bias": "model-00003-of-00003.safetensors",
514
+ "model.vision_backbone.image_vit.transformer.resblocks.19.attention.wo.weight": "model-00003-of-00003.safetensors",
515
+ "model.vision_backbone.image_vit.transformer.resblocks.19.attention.wq.bias": "model-00003-of-00003.safetensors",
516
+ "model.vision_backbone.image_vit.transformer.resblocks.19.attention.wq.weight": "model-00003-of-00003.safetensors",
517
+ "model.vision_backbone.image_vit.transformer.resblocks.19.attention.wv.bias": "model-00003-of-00003.safetensors",
518
+ "model.vision_backbone.image_vit.transformer.resblocks.19.attention.wv.weight": "model-00003-of-00003.safetensors",
519
+ "model.vision_backbone.image_vit.transformer.resblocks.19.attention_norm.bias": "model-00003-of-00003.safetensors",
520
+ "model.vision_backbone.image_vit.transformer.resblocks.19.attention_norm.weight": "model-00003-of-00003.safetensors",
521
+ "model.vision_backbone.image_vit.transformer.resblocks.19.feed_forward.w1.bias": "model-00003-of-00003.safetensors",
522
+ "model.vision_backbone.image_vit.transformer.resblocks.19.feed_forward.w1.weight": "model-00003-of-00003.safetensors",
523
+ "model.vision_backbone.image_vit.transformer.resblocks.19.feed_forward.w2.bias": "model-00003-of-00003.safetensors",
524
+ "model.vision_backbone.image_vit.transformer.resblocks.19.feed_forward.w2.weight": "model-00003-of-00003.safetensors",
525
+ "model.vision_backbone.image_vit.transformer.resblocks.19.ffn_norm.bias": "model-00003-of-00003.safetensors",
526
+ "model.vision_backbone.image_vit.transformer.resblocks.19.ffn_norm.weight": "model-00003-of-00003.safetensors",
527
+ "model.vision_backbone.image_vit.transformer.resblocks.2.attention.wk.bias": "model-00002-of-00003.safetensors",
528
+ "model.vision_backbone.image_vit.transformer.resblocks.2.attention.wk.weight": "model-00002-of-00003.safetensors",
529
+ "model.vision_backbone.image_vit.transformer.resblocks.2.attention.wo.bias": "model-00002-of-00003.safetensors",
530
+ "model.vision_backbone.image_vit.transformer.resblocks.2.attention.wo.weight": "model-00002-of-00003.safetensors",
531
+ "model.vision_backbone.image_vit.transformer.resblocks.2.attention.wq.bias": "model-00002-of-00003.safetensors",
532
+ "model.vision_backbone.image_vit.transformer.resblocks.2.attention.wq.weight": "model-00002-of-00003.safetensors",
533
+ "model.vision_backbone.image_vit.transformer.resblocks.2.attention.wv.bias": "model-00002-of-00003.safetensors",
534
+ "model.vision_backbone.image_vit.transformer.resblocks.2.attention.wv.weight": "model-00002-of-00003.safetensors",
535
+ "model.vision_backbone.image_vit.transformer.resblocks.2.attention_norm.bias": "model-00002-of-00003.safetensors",
536
+ "model.vision_backbone.image_vit.transformer.resblocks.2.attention_norm.weight": "model-00002-of-00003.safetensors",
537
+ "model.vision_backbone.image_vit.transformer.resblocks.2.feed_forward.w1.bias": "model-00002-of-00003.safetensors",
538
+ "model.vision_backbone.image_vit.transformer.resblocks.2.feed_forward.w1.weight": "model-00002-of-00003.safetensors",
539
+ "model.vision_backbone.image_vit.transformer.resblocks.2.feed_forward.w2.bias": "model-00002-of-00003.safetensors",
540
+ "model.vision_backbone.image_vit.transformer.resblocks.2.feed_forward.w2.weight": "model-00002-of-00003.safetensors",
541
+ "model.vision_backbone.image_vit.transformer.resblocks.2.ffn_norm.bias": "model-00002-of-00003.safetensors",
542
+ "model.vision_backbone.image_vit.transformer.resblocks.2.ffn_norm.weight": "model-00002-of-00003.safetensors",
543
+ "model.vision_backbone.image_vit.transformer.resblocks.20.attention.wk.bias": "model-00003-of-00003.safetensors",
544
+ "model.vision_backbone.image_vit.transformer.resblocks.20.attention.wk.weight": "model-00003-of-00003.safetensors",
545
+ "model.vision_backbone.image_vit.transformer.resblocks.20.attention.wo.bias": "model-00003-of-00003.safetensors",
546
+ "model.vision_backbone.image_vit.transformer.resblocks.20.attention.wo.weight": "model-00003-of-00003.safetensors",
547
+ "model.vision_backbone.image_vit.transformer.resblocks.20.attention.wq.bias": "model-00003-of-00003.safetensors",
548
+ "model.vision_backbone.image_vit.transformer.resblocks.20.attention.wq.weight": "model-00003-of-00003.safetensors",
549
+ "model.vision_backbone.image_vit.transformer.resblocks.20.attention.wv.bias": "model-00003-of-00003.safetensors",
550
+ "model.vision_backbone.image_vit.transformer.resblocks.20.attention.wv.weight": "model-00003-of-00003.safetensors",
551
+ "model.vision_backbone.image_vit.transformer.resblocks.20.attention_norm.bias": "model-00003-of-00003.safetensors",
552
+ "model.vision_backbone.image_vit.transformer.resblocks.20.attention_norm.weight": "model-00003-of-00003.safetensors",
553
+ "model.vision_backbone.image_vit.transformer.resblocks.20.feed_forward.w1.bias": "model-00003-of-00003.safetensors",
554
+ "model.vision_backbone.image_vit.transformer.resblocks.20.feed_forward.w1.weight": "model-00003-of-00003.safetensors",
555
+ "model.vision_backbone.image_vit.transformer.resblocks.20.feed_forward.w2.bias": "model-00003-of-00003.safetensors",
556
+ "model.vision_backbone.image_vit.transformer.resblocks.20.feed_forward.w2.weight": "model-00003-of-00003.safetensors",
557
+ "model.vision_backbone.image_vit.transformer.resblocks.20.ffn_norm.bias": "model-00003-of-00003.safetensors",
558
+ "model.vision_backbone.image_vit.transformer.resblocks.20.ffn_norm.weight": "model-00003-of-00003.safetensors",
559
+ "model.vision_backbone.image_vit.transformer.resblocks.21.attention.wk.bias": "model-00003-of-00003.safetensors",
560
+ "model.vision_backbone.image_vit.transformer.resblocks.21.attention.wk.weight": "model-00003-of-00003.safetensors",
561
+ "model.vision_backbone.image_vit.transformer.resblocks.21.attention.wo.bias": "model-00003-of-00003.safetensors",
562
+ "model.vision_backbone.image_vit.transformer.resblocks.21.attention.wo.weight": "model-00003-of-00003.safetensors",
563
+ "model.vision_backbone.image_vit.transformer.resblocks.21.attention.wq.bias": "model-00003-of-00003.safetensors",
564
+ "model.vision_backbone.image_vit.transformer.resblocks.21.attention.wq.weight": "model-00003-of-00003.safetensors",
565
+ "model.vision_backbone.image_vit.transformer.resblocks.21.attention.wv.bias": "model-00003-of-00003.safetensors",
566
+ "model.vision_backbone.image_vit.transformer.resblocks.21.attention.wv.weight": "model-00003-of-00003.safetensors",
567
+ "model.vision_backbone.image_vit.transformer.resblocks.21.attention_norm.bias": "model-00003-of-00003.safetensors",
568
+ "model.vision_backbone.image_vit.transformer.resblocks.21.attention_norm.weight": "model-00003-of-00003.safetensors",
569
+ "model.vision_backbone.image_vit.transformer.resblocks.21.feed_forward.w1.bias": "model-00003-of-00003.safetensors",
570
+ "model.vision_backbone.image_vit.transformer.resblocks.21.feed_forward.w1.weight": "model-00003-of-00003.safetensors",
571
+ "model.vision_backbone.image_vit.transformer.resblocks.21.feed_forward.w2.bias": "model-00003-of-00003.safetensors",
572
+ "model.vision_backbone.image_vit.transformer.resblocks.21.feed_forward.w2.weight": "model-00003-of-00003.safetensors",
573
+ "model.vision_backbone.image_vit.transformer.resblocks.21.ffn_norm.bias": "model-00003-of-00003.safetensors",
574
+ "model.vision_backbone.image_vit.transformer.resblocks.21.ffn_norm.weight": "model-00003-of-00003.safetensors",
575
+ "model.vision_backbone.image_vit.transformer.resblocks.22.attention.wk.bias": "model-00003-of-00003.safetensors",
576
+ "model.vision_backbone.image_vit.transformer.resblocks.22.attention.wk.weight": "model-00003-of-00003.safetensors",
577
+ "model.vision_backbone.image_vit.transformer.resblocks.22.attention.wo.bias": "model-00003-of-00003.safetensors",
578
+ "model.vision_backbone.image_vit.transformer.resblocks.22.attention.wo.weight": "model-00003-of-00003.safetensors",
579
+ "model.vision_backbone.image_vit.transformer.resblocks.22.attention.wq.bias": "model-00003-of-00003.safetensors",
580
+ "model.vision_backbone.image_vit.transformer.resblocks.22.attention.wq.weight": "model-00003-of-00003.safetensors",
581
+ "model.vision_backbone.image_vit.transformer.resblocks.22.attention.wv.bias": "model-00003-of-00003.safetensors",
582
+ "model.vision_backbone.image_vit.transformer.resblocks.22.attention.wv.weight": "model-00003-of-00003.safetensors",
583
+ "model.vision_backbone.image_vit.transformer.resblocks.22.attention_norm.bias": "model-00003-of-00003.safetensors",
584
+ "model.vision_backbone.image_vit.transformer.resblocks.22.attention_norm.weight": "model-00003-of-00003.safetensors",
585
+ "model.vision_backbone.image_vit.transformer.resblocks.22.feed_forward.w1.bias": "model-00003-of-00003.safetensors",
586
+ "model.vision_backbone.image_vit.transformer.resblocks.22.feed_forward.w1.weight": "model-00003-of-00003.safetensors",
587
+ "model.vision_backbone.image_vit.transformer.resblocks.22.feed_forward.w2.bias": "model-00003-of-00003.safetensors",
588
+ "model.vision_backbone.image_vit.transformer.resblocks.22.feed_forward.w2.weight": "model-00003-of-00003.safetensors",
589
+ "model.vision_backbone.image_vit.transformer.resblocks.22.ffn_norm.bias": "model-00003-of-00003.safetensors",
590
+ "model.vision_backbone.image_vit.transformer.resblocks.22.ffn_norm.weight": "model-00003-of-00003.safetensors",
591
+ "model.vision_backbone.image_vit.transformer.resblocks.3.attention.wk.bias": "model-00002-of-00003.safetensors",
592
+ "model.vision_backbone.image_vit.transformer.resblocks.3.attention.wk.weight": "model-00002-of-00003.safetensors",
593
+ "model.vision_backbone.image_vit.transformer.resblocks.3.attention.wo.bias": "model-00002-of-00003.safetensors",
594
+ "model.vision_backbone.image_vit.transformer.resblocks.3.attention.wo.weight": "model-00002-of-00003.safetensors",
595
+ "model.vision_backbone.image_vit.transformer.resblocks.3.attention.wq.bias": "model-00002-of-00003.safetensors",
596
+ "model.vision_backbone.image_vit.transformer.resblocks.3.attention.wq.weight": "model-00002-of-00003.safetensors",
597
+ "model.vision_backbone.image_vit.transformer.resblocks.3.attention.wv.bias": "model-00002-of-00003.safetensors",
598
+ "model.vision_backbone.image_vit.transformer.resblocks.3.attention.wv.weight": "model-00002-of-00003.safetensors",
599
+ "model.vision_backbone.image_vit.transformer.resblocks.3.attention_norm.bias": "model-00002-of-00003.safetensors",
600
+ "model.vision_backbone.image_vit.transformer.resblocks.3.attention_norm.weight": "model-00002-of-00003.safetensors",
601
+ "model.vision_backbone.image_vit.transformer.resblocks.3.feed_forward.w1.bias": "model-00002-of-00003.safetensors",
602
+ "model.vision_backbone.image_vit.transformer.resblocks.3.feed_forward.w1.weight": "model-00002-of-00003.safetensors",
603
+ "model.vision_backbone.image_vit.transformer.resblocks.3.feed_forward.w2.bias": "model-00002-of-00003.safetensors",
604
+ "model.vision_backbone.image_vit.transformer.resblocks.3.feed_forward.w2.weight": "model-00002-of-00003.safetensors",
605
+ "model.vision_backbone.image_vit.transformer.resblocks.3.ffn_norm.bias": "model-00002-of-00003.safetensors",
606
+ "model.vision_backbone.image_vit.transformer.resblocks.3.ffn_norm.weight": "model-00002-of-00003.safetensors",
607
+ "model.vision_backbone.image_vit.transformer.resblocks.4.attention.wk.bias": "model-00002-of-00003.safetensors",
608
+ "model.vision_backbone.image_vit.transformer.resblocks.4.attention.wk.weight": "model-00002-of-00003.safetensors",
609
+ "model.vision_backbone.image_vit.transformer.resblocks.4.attention.wo.bias": "model-00002-of-00003.safetensors",
610
+ "model.vision_backbone.image_vit.transformer.resblocks.4.attention.wo.weight": "model-00002-of-00003.safetensors",
611
+ "model.vision_backbone.image_vit.transformer.resblocks.4.attention.wq.bias": "model-00002-of-00003.safetensors",
612
+ "model.vision_backbone.image_vit.transformer.resblocks.4.attention.wq.weight": "model-00002-of-00003.safetensors",
613
+ "model.vision_backbone.image_vit.transformer.resblocks.4.attention.wv.bias": "model-00002-of-00003.safetensors",
614
+ "model.vision_backbone.image_vit.transformer.resblocks.4.attention.wv.weight": "model-00002-of-00003.safetensors",
615
+ "model.vision_backbone.image_vit.transformer.resblocks.4.attention_norm.bias": "model-00002-of-00003.safetensors",
616
+ "model.vision_backbone.image_vit.transformer.resblocks.4.attention_norm.weight": "model-00002-of-00003.safetensors",
617
+ "model.vision_backbone.image_vit.transformer.resblocks.4.feed_forward.w1.bias": "model-00002-of-00003.safetensors",
618
+ "model.vision_backbone.image_vit.transformer.resblocks.4.feed_forward.w1.weight": "model-00002-of-00003.safetensors",
619
+ "model.vision_backbone.image_vit.transformer.resblocks.4.feed_forward.w2.bias": "model-00002-of-00003.safetensors",
620
+ "model.vision_backbone.image_vit.transformer.resblocks.4.feed_forward.w2.weight": "model-00002-of-00003.safetensors",
621
+ "model.vision_backbone.image_vit.transformer.resblocks.4.ffn_norm.bias": "model-00002-of-00003.safetensors",
622
+ "model.vision_backbone.image_vit.transformer.resblocks.4.ffn_norm.weight": "model-00002-of-00003.safetensors",
623
+ "model.vision_backbone.image_vit.transformer.resblocks.5.attention.wk.bias": "model-00002-of-00003.safetensors",
624
+ "model.vision_backbone.image_vit.transformer.resblocks.5.attention.wk.weight": "model-00002-of-00003.safetensors",
625
+ "model.vision_backbone.image_vit.transformer.resblocks.5.attention.wo.bias": "model-00002-of-00003.safetensors",
626
+ "model.vision_backbone.image_vit.transformer.resblocks.5.attention.wo.weight": "model-00002-of-00003.safetensors",
627
+ "model.vision_backbone.image_vit.transformer.resblocks.5.attention.wq.bias": "model-00002-of-00003.safetensors",
628
+ "model.vision_backbone.image_vit.transformer.resblocks.5.attention.wq.weight": "model-00002-of-00003.safetensors",
629
+ "model.vision_backbone.image_vit.transformer.resblocks.5.attention.wv.bias": "model-00002-of-00003.safetensors",
630
+ "model.vision_backbone.image_vit.transformer.resblocks.5.attention.wv.weight": "model-00002-of-00003.safetensors",
631
+ "model.vision_backbone.image_vit.transformer.resblocks.5.attention_norm.bias": "model-00002-of-00003.safetensors",
632
+ "model.vision_backbone.image_vit.transformer.resblocks.5.attention_norm.weight": "model-00002-of-00003.safetensors",
633
+ "model.vision_backbone.image_vit.transformer.resblocks.5.feed_forward.w1.bias": "model-00002-of-00003.safetensors",
634
+ "model.vision_backbone.image_vit.transformer.resblocks.5.feed_forward.w1.weight": "model-00002-of-00003.safetensors",
635
+ "model.vision_backbone.image_vit.transformer.resblocks.5.feed_forward.w2.bias": "model-00002-of-00003.safetensors",
636
+ "model.vision_backbone.image_vit.transformer.resblocks.5.feed_forward.w2.weight": "model-00002-of-00003.safetensors",
637
+ "model.vision_backbone.image_vit.transformer.resblocks.5.ffn_norm.bias": "model-00002-of-00003.safetensors",
638
+ "model.vision_backbone.image_vit.transformer.resblocks.5.ffn_norm.weight": "model-00002-of-00003.safetensors",
639
+ "model.vision_backbone.image_vit.transformer.resblocks.6.attention.wk.bias": "model-00002-of-00003.safetensors",
640
+ "model.vision_backbone.image_vit.transformer.resblocks.6.attention.wk.weight": "model-00002-of-00003.safetensors",
641
+ "model.vision_backbone.image_vit.transformer.resblocks.6.attention.wo.bias": "model-00002-of-00003.safetensors",
642
+ "model.vision_backbone.image_vit.transformer.resblocks.6.attention.wo.weight": "model-00002-of-00003.safetensors",
643
+ "model.vision_backbone.image_vit.transformer.resblocks.6.attention.wq.bias": "model-00002-of-00003.safetensors",
644
+ "model.vision_backbone.image_vit.transformer.resblocks.6.attention.wq.weight": "model-00002-of-00003.safetensors",
645
+ "model.vision_backbone.image_vit.transformer.resblocks.6.attention.wv.bias": "model-00002-of-00003.safetensors",
646
+ "model.vision_backbone.image_vit.transformer.resblocks.6.attention.wv.weight": "model-00002-of-00003.safetensors",
647
+ "model.vision_backbone.image_vit.transformer.resblocks.6.attention_norm.bias": "model-00002-of-00003.safetensors",
648
+ "model.vision_backbone.image_vit.transformer.resblocks.6.attention_norm.weight": "model-00002-of-00003.safetensors",
649
+ "model.vision_backbone.image_vit.transformer.resblocks.6.feed_forward.w1.bias": "model-00002-of-00003.safetensors",
650
+ "model.vision_backbone.image_vit.transformer.resblocks.6.feed_forward.w1.weight": "model-00002-of-00003.safetensors",
651
+ "model.vision_backbone.image_vit.transformer.resblocks.6.feed_forward.w2.bias": "model-00002-of-00003.safetensors",
652
+ "model.vision_backbone.image_vit.transformer.resblocks.6.feed_forward.w2.weight": "model-00002-of-00003.safetensors",
653
+ "model.vision_backbone.image_vit.transformer.resblocks.6.ffn_norm.bias": "model-00002-of-00003.safetensors",
654
+ "model.vision_backbone.image_vit.transformer.resblocks.6.ffn_norm.weight": "model-00002-of-00003.safetensors",
655
+ "model.vision_backbone.image_vit.transformer.resblocks.7.attention.wk.bias": "model-00002-of-00003.safetensors",
656
+ "model.vision_backbone.image_vit.transformer.resblocks.7.attention.wk.weight": "model-00002-of-00003.safetensors",
657
+ "model.vision_backbone.image_vit.transformer.resblocks.7.attention.wo.bias": "model-00002-of-00003.safetensors",
658
+ "model.vision_backbone.image_vit.transformer.resblocks.7.attention.wo.weight": "model-00002-of-00003.safetensors",
659
+ "model.vision_backbone.image_vit.transformer.resblocks.7.attention.wq.bias": "model-00002-of-00003.safetensors",
660
+ "model.vision_backbone.image_vit.transformer.resblocks.7.attention.wq.weight": "model-00002-of-00003.safetensors",
661
+ "model.vision_backbone.image_vit.transformer.resblocks.7.attention.wv.bias": "model-00002-of-00003.safetensors",
662
+ "model.vision_backbone.image_vit.transformer.resblocks.7.attention.wv.weight": "model-00002-of-00003.safetensors",
663
+ "model.vision_backbone.image_vit.transformer.resblocks.7.attention_norm.bias": "model-00002-of-00003.safetensors",
664
+ "model.vision_backbone.image_vit.transformer.resblocks.7.attention_norm.weight": "model-00002-of-00003.safetensors",
665
+ "model.vision_backbone.image_vit.transformer.resblocks.7.feed_forward.w1.bias": "model-00002-of-00003.safetensors",
666
+ "model.vision_backbone.image_vit.transformer.resblocks.7.feed_forward.w1.weight": "model-00002-of-00003.safetensors",
667
+ "model.vision_backbone.image_vit.transformer.resblocks.7.feed_forward.w2.bias": "model-00002-of-00003.safetensors",
668
+ "model.vision_backbone.image_vit.transformer.resblocks.7.feed_forward.w2.weight": "model-00002-of-00003.safetensors",
669
+ "model.vision_backbone.image_vit.transformer.resblocks.7.ffn_norm.bias": "model-00002-of-00003.safetensors",
670
+ "model.vision_backbone.image_vit.transformer.resblocks.7.ffn_norm.weight": "model-00002-of-00003.safetensors",
671
+ "model.vision_backbone.image_vit.transformer.resblocks.8.attention.wk.bias": "model-00002-of-00003.safetensors",
672
+ "model.vision_backbone.image_vit.transformer.resblocks.8.attention.wk.weight": "model-00002-of-00003.safetensors",
673
+ "model.vision_backbone.image_vit.transformer.resblocks.8.attention.wo.bias": "model-00002-of-00003.safetensors",
674
+ "model.vision_backbone.image_vit.transformer.resblocks.8.attention.wo.weight": "model-00002-of-00003.safetensors",
675
+ "model.vision_backbone.image_vit.transformer.resblocks.8.attention.wq.bias": "model-00002-of-00003.safetensors",
676
+ "model.vision_backbone.image_vit.transformer.resblocks.8.attention.wq.weight": "model-00002-of-00003.safetensors",
677
+ "model.vision_backbone.image_vit.transformer.resblocks.8.attention.wv.bias": "model-00002-of-00003.safetensors",
678
+ "model.vision_backbone.image_vit.transformer.resblocks.8.attention.wv.weight": "model-00002-of-00003.safetensors",
679
+ "model.vision_backbone.image_vit.transformer.resblocks.8.attention_norm.bias": "model-00002-of-00003.safetensors",
680
+ "model.vision_backbone.image_vit.transformer.resblocks.8.attention_norm.weight": "model-00002-of-00003.safetensors",
681
+ "model.vision_backbone.image_vit.transformer.resblocks.8.feed_forward.w1.bias": "model-00002-of-00003.safetensors",
682
+ "model.vision_backbone.image_vit.transformer.resblocks.8.feed_forward.w1.weight": "model-00002-of-00003.safetensors",
683
+ "model.vision_backbone.image_vit.transformer.resblocks.8.feed_forward.w2.bias": "model-00002-of-00003.safetensors",
684
+ "model.vision_backbone.image_vit.transformer.resblocks.8.feed_forward.w2.weight": "model-00002-of-00003.safetensors",
685
+ "model.vision_backbone.image_vit.transformer.resblocks.8.ffn_norm.bias": "model-00002-of-00003.safetensors",
686
+ "model.vision_backbone.image_vit.transformer.resblocks.8.ffn_norm.weight": "model-00002-of-00003.safetensors",
687
+ "model.vision_backbone.image_vit.transformer.resblocks.9.attention.wk.bias": "model-00002-of-00003.safetensors",
688
+ "model.vision_backbone.image_vit.transformer.resblocks.9.attention.wk.weight": "model-00002-of-00003.safetensors",
689
+ "model.vision_backbone.image_vit.transformer.resblocks.9.attention.wo.bias": "model-00002-of-00003.safetensors",
690
+ "model.vision_backbone.image_vit.transformer.resblocks.9.attention.wo.weight": "model-00002-of-00003.safetensors",
691
+ "model.vision_backbone.image_vit.transformer.resblocks.9.attention.wq.bias": "model-00002-of-00003.safetensors",
692
+ "model.vision_backbone.image_vit.transformer.resblocks.9.attention.wq.weight": "model-00002-of-00003.safetensors",
693
+ "model.vision_backbone.image_vit.transformer.resblocks.9.attention.wv.bias": "model-00002-of-00003.safetensors",
694
+ "model.vision_backbone.image_vit.transformer.resblocks.9.attention.wv.weight": "model-00002-of-00003.safetensors",
695
+ "model.vision_backbone.image_vit.transformer.resblocks.9.attention_norm.bias": "model-00002-of-00003.safetensors",
696
+ "model.vision_backbone.image_vit.transformer.resblocks.9.attention_norm.weight": "model-00002-of-00003.safetensors",
697
+ "model.vision_backbone.image_vit.transformer.resblocks.9.feed_forward.w1.bias": "model-00002-of-00003.safetensors",
698
+ "model.vision_backbone.image_vit.transformer.resblocks.9.feed_forward.w1.weight": "model-00002-of-00003.safetensors",
699
+ "model.vision_backbone.image_vit.transformer.resblocks.9.feed_forward.w2.bias": "model-00002-of-00003.safetensors",
700
+ "model.vision_backbone.image_vit.transformer.resblocks.9.feed_forward.w2.weight": "model-00002-of-00003.safetensors",
701
+ "model.vision_backbone.image_vit.transformer.resblocks.9.ffn_norm.bias": "model-00002-of-00003.safetensors",
702
+ "model.vision_backbone.image_vit.transformer.resblocks.9.ffn_norm.weight": "model-00002-of-00003.safetensors",
703
+ "model.vision_backbone.pad_embed": "model-00002-of-00003.safetensors"
704
+ }
705
+ }
modeling_molmo.py ADDED
@@ -0,0 +1,2367 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import math
3
+ from copy import deepcopy
4
+ from dataclasses import fields, dataclass, replace
5
+ from enum import Enum
6
+ from typing import List, Optional, Tuple, Union, Dict, Any, Sequence, Callable, cast, MutableMapping
7
+
8
+ import torch
9
+ from einops import einsum, einops
10
+ from transformers import PreTrainedModel, GenerationConfig
11
+ from transformers.cache_utils import Cache
12
+ from transformers.modeling_outputs import CausalLMOutputWithPast, ModelOutput
13
+ from transformers.models.auto import AutoModelForCausalLM
14
+ from torch import nn
15
+
16
+ from .config_molmo import MolmoConfig
17
+ from torch.nn import functional as F
18
+
19
+
20
+ log = logging.getLogger(__name__)
21
+
22
+
23
+ class BufferCache(dict, MutableMapping[str, torch.Tensor]):
24
+ """
25
+ Cache for attention biases and other things that would normally be stored as buffers.
26
+ We avoid using buffers because we've run into various issues doing so with FSDP.
27
+ In general it appears the way FSDP handles buffers is not well-defined.
28
+ It doesn't shard them but apparently it does synchronize them across processes, which we want to avoid
29
+ since (A) it isn't necessary, and (B) we sometimes have `-inf` in these biases which might get turned into
30
+ NaNs when they're synchronized due to casting or some other issue.
31
+ """
32
+
33
+
34
+ class StrEnum(str, Enum):
35
+ def __str__(self) -> str:
36
+ return self.value
37
+
38
+ def __repr__(self) -> str:
39
+ return f"'{str(self)}'"
40
+
41
+
42
+ class ImageProjectType(StrEnum):
43
+ mlp = "mlp"
44
+ mlpx2 = "2mlp"
45
+ linear = "linear"
46
+
47
+
48
+ class ImagePooling2DType(StrEnum):
49
+ attention = "attention"
50
+ attention_meanq = "attention-meanq"
51
+ attention_2wide = "attention_2wide"
52
+ attention_v2 = "attention-v2"
53
+ none = "none"
54
+ stack = "stack"
55
+
56
+
57
+ class ActivationType(StrEnum):
58
+ quick_gelu = "quick_gelu"
59
+ gelu = "gelu"
60
+ gelu_tanh = "gelu_tanh"
61
+ relu = "relu"
62
+ silu = "silu"
63
+ llama_geglu = "llama_geglu"
64
+ llama_geglu_tanh = "llama_geglu_tanh"
65
+ llama_swiglu = "llama_swiglu"
66
+ swiglu = "swiglu"
67
+
68
+
69
+ def ensure_finite_(x: torch.Tensor, check_neg_inf: bool = True, check_pos_inf: bool = False):
70
+ """
71
+ Modify ``x`` in place to replace ``float("-inf")`` with the minimum value of the dtype when ``check_neg_inf``
72
+ is ``True`` and to replace ``float("inf")`` with the maximum value of the dtype when ``check_pos_inf`` is ``True``.
73
+ """
74
+ if check_neg_inf:
75
+ x.masked_fill_(x == float("-inf"), torch.finfo(x.dtype).min)
76
+ if check_pos_inf:
77
+ x.masked_fill_(x == float("inf"), torch.finfo(x.dtype).max)
78
+
79
+
80
+ class MolmoConfigurationError(Exception):
81
+ pass
82
+
83
+
84
+ def _non_meta_init_device(config) -> torch.device:
85
+ if config.init_device is not None and config.init_device != "meta":
86
+ return torch.device(config.init_device)
87
+ else:
88
+ return torch.device("cuda" if torch.cuda.is_available() else "cpu")
89
+
90
+
91
+ class RotaryEmbedding(nn.Module):
92
+ """
93
+ [Rotary positional embeddings (RoPE)](https://arxiv.org/abs/2104.09864).
94
+ """
95
+
96
+ def __init__(self, config: MolmoConfig, cache: BufferCache):
97
+ super().__init__()
98
+ self.config = config
99
+ self.__cache = cache
100
+ # Warm up cache.
101
+ self.get_rotary_embedding(
102
+ config.max_position_embeddings or config.max_sequence_length,
103
+ _non_meta_init_device(config)
104
+ )
105
+
106
+ def get_rotary_embedding(self, seq_len: int, device: torch.device) -> Tuple[torch.Tensor, torch.Tensor]:
107
+ if (
108
+ (pos_sin := self.__cache.get("rope_pos_sin")) is not None
109
+ and (pos_cos := self.__cache.get("rope_pos_cos")) is not None
110
+ and pos_sin.shape[-2] >= seq_len
111
+ and pos_cos.shape[-2] >= seq_len
112
+ ):
113
+ if pos_sin.device != device:
114
+ pos_sin = pos_sin.to(device)
115
+ self.__cache["rope_pos_sin"] = pos_sin
116
+ if pos_cos.device != device:
117
+ pos_cos = pos_cos.to(device)
118
+ self.__cache["rope_pos_cos"] = pos_cos
119
+ return pos_sin[:, :, :seq_len, :], pos_cos[:, :, :seq_len, :]
120
+
121
+ with torch.autocast(device.type, enabled=False):
122
+ dim = self.config.d_model // self.config.n_heads
123
+ inv_freq = 1.0 / (self.config.rope_theta ** (torch.arange(0, dim, 2, device=device, dtype=torch.float) / dim))
124
+ seq = torch.arange(seq_len, device=device, dtype=torch.float)
125
+ freqs = torch.einsum("i , j -> i j", seq, inv_freq)
126
+ if self.config.rope_impl == "interleave":
127
+ positions = freqs.repeat_interleave(2, dim=-1)
128
+ else:
129
+ positions = torch.cat((freqs, freqs), dim=-1)
130
+ pos_sin, pos_cos = positions.sin()[None, None, :, :], positions.cos()[None, None, :, :]
131
+ self.__cache["rope_pos_sin"] = pos_sin
132
+ self.__cache["rope_pos_cos"] = pos_cos
133
+ return pos_sin, pos_cos
134
+
135
+ def rotate_half(self, x: torch.Tensor) -> torch.Tensor:
136
+ B, nh, T, hs = x.size()
137
+ x = x.view(B, nh, T, 2, hs // 2)
138
+ x1, x2 = x.unbind(dim=-2)
139
+ return torch.cat((-x2, x1), dim=-1)
140
+
141
+ def rotate_every_two(self, x: torch.Tensor) -> torch.Tensor:
142
+ B, nh, T, hs = x.size()
143
+ x = x.view(B, nh, T, hs // 2, 2)
144
+ x1, x2 = x.unbind(dim=-1)
145
+ x = torch.stack((-x2, x1), dim=-1)
146
+ return x.view(B, nh, T, hs)
147
+
148
+ def apply_rotary_pos_emb(self, pos_sin: torch.Tensor, pos_cos: torch.Tensor, t: torch.Tensor) -> torch.Tensor:
149
+ if self.config.rope_impl == "interleave":
150
+ return ((t * pos_cos) + (self.rotate_every_two(t) * pos_sin)).to(t.dtype)
151
+ else:
152
+ return ((t * pos_cos) + (self.rotate_half(t) * pos_sin)).to(t.dtype)
153
+
154
+ def forward(
155
+ self,
156
+ q: torch.Tensor,
157
+ k: torch.Tensor,
158
+ position_ids: Optional[torch.Tensor] = None
159
+ ) -> Tuple[torch.Tensor, torch.Tensor]:
160
+ if self.config.rope_full_precision:
161
+ q_, k_ = q.float(), k.float()
162
+ else:
163
+ q_, k_ = q, k
164
+
165
+ with torch.autocast(q.device.type, enabled=False):
166
+ batch_size = q_.shape[0]
167
+ query_len, key_len = q_.shape[-2], k_.shape[-2] # could be different if layer_past not None
168
+ if position_ids is not None:
169
+ freqs_cis_len = (self.config.max_position_embeddings or self.config.max_sequence_length)
170
+ else:
171
+ freqs_cis_len = key_len
172
+ pos_sin, pos_cos = self.get_rotary_embedding(freqs_cis_len, q_.device)
173
+ pos_sin = pos_sin.type_as(q_)
174
+ pos_cos = pos_cos.type_as(q_)
175
+ if position_ids is not None:
176
+ assert query_len == key_len, "Query and key lengths must be equal when using position IDs."
177
+ pos_sin = pos_sin[0, 0][position_ids].view(
178
+ (batch_size, 1, key_len, pos_sin.shape[-1])
179
+ )
180
+ pos_cos = pos_cos[0, 0][position_ids].view(
181
+ (batch_size, 1, key_len, pos_cos.shape[-1])
182
+ )
183
+ q_ = self.apply_rotary_pos_emb(
184
+ pos_sin[:, :, key_len - query_len : key_len, :],
185
+ pos_cos[:, :, key_len - query_len : key_len, :],
186
+ q_,
187
+ )
188
+ k_ = self.apply_rotary_pos_emb(pos_sin, pos_cos, k_)
189
+ return q_.type_as(q), k_.type_as(k)
190
+
191
+
192
+ class MolmoBlock(nn.Module):
193
+ """
194
+ A base class for transformer block implementations.
195
+ """
196
+
197
+ def __init__(self, layer_id: int, config: MolmoConfig, cache: BufferCache):
198
+ super().__init__()
199
+ self.layer_id = layer_id
200
+ self.config = config
201
+ self.hidden_size = (
202
+ config.mlp_hidden_size if config.mlp_hidden_size is not None else config.mlp_ratio * config.d_model
203
+ )
204
+ self.__cache = cache
205
+ self._activation_checkpoint_fn = None
206
+
207
+ # Dropout.
208
+ self.dropout = Dropout(config.residual_dropout)
209
+
210
+ # Layer norms.
211
+ self.k_norm: Optional[LayerNormBase] = None
212
+ self.q_norm: Optional[LayerNormBase] = None
213
+ if config.attention_layer_norm:
214
+ assert config.effective_n_kv_heads is not None
215
+ self.k_norm = LayerNormBase.build(
216
+ config,
217
+ size=(config.d_model // config.n_heads) * config.effective_n_kv_heads,
218
+ elementwise_affine=config.attention_layer_norm_with_affine,
219
+ )
220
+ self.q_norm = LayerNormBase.build(config, elementwise_affine=config.attention_layer_norm_with_affine)
221
+
222
+ # Make sure QKV clip coefficient is positive, otherwise it's not well-defined.
223
+ if config.clip_qkv is not None:
224
+ assert config.clip_qkv > 0
225
+
226
+ # Activation function.
227
+ self.act = Activation.build(config)
228
+ assert (self.act.output_multiplier * self.hidden_size) % 1 == 0
229
+
230
+ # Attention output projection.
231
+ input_dim = config.d_model
232
+ self.attn_out = nn.Linear(
233
+ input_dim, config.d_model,
234
+ bias=config.include_bias,
235
+ device=config.init_device
236
+ )
237
+
238
+ # Feed-forward output projection.
239
+ self.ff_out = nn.Linear(
240
+ int(self.act.output_multiplier * self.hidden_size),
241
+ config.d_model,
242
+ bias=config.include_bias,
243
+ device=config.init_device,
244
+ )
245
+ self.ff_out._is_residual = True # type: ignore
246
+
247
+ # Rotary embeddings.
248
+ if self.config.rope:
249
+ self.rotary_emb = RotaryEmbedding(config, self.__cache)
250
+
251
+ self.flash_attn_func = None
252
+ if config.attention_type == "flash":
253
+ try:
254
+ from flash_attn import flash_attn_func # type: ignore
255
+
256
+ self.flash_attn_func = flash_attn_func
257
+ except ModuleNotFoundError:
258
+ pass
259
+
260
+ def reset_parameters(self):
261
+ if self.k_norm is not None:
262
+ self.k_norm.reset_parameters()
263
+ if self.q_norm is not None:
264
+ self.q_norm.reset_parameters()
265
+ init_weights(
266
+ self.config,
267
+ self.attn_out,
268
+ d=self.config.d_model,
269
+ layer_id=self.layer_id,
270
+ type_of_module=ModuleType.out_module,
271
+ )
272
+ init_weights(
273
+ self.config,
274
+ self.ff_out,
275
+ d=self.ff_out.in_features,
276
+ layer_id=self.layer_id,
277
+ type_of_module=ModuleType.out_module,
278
+ )
279
+
280
+ @classmethod
281
+ def _cast_attn_bias(cls, bias: torch.Tensor, input_dtype: torch.dtype) -> torch.Tensor:
282
+ target_dtype = input_dtype
283
+ # NOTE: `is_autocast_enabled()` only checks for CUDA autocast, so we use the separate function
284
+ # `is_autocast_cpu_enabled()` for CPU autocast.
285
+ # See https://github.com/pytorch/pytorch/issues/110966.
286
+ if bias.device.type == "cuda" and torch.is_autocast_enabled():
287
+ target_dtype = torch.get_autocast_gpu_dtype()
288
+ elif bias.device.type == "cpu" and torch.is_autocast_cpu_enabled():
289
+ target_dtype = torch.get_autocast_cpu_dtype()
290
+ if bias.dtype != target_dtype:
291
+ bias = bias.to(target_dtype)
292
+ ensure_finite_(bias, check_neg_inf=True, check_pos_inf=False)
293
+ return bias
294
+
295
+ def _scaled_dot_product_attention(
296
+ self,
297
+ q: torch.Tensor,
298
+ k: torch.Tensor,
299
+ v: torch.Tensor,
300
+ attn_mask: Optional[torch.Tensor] = None,
301
+ dropout_p: float = 0.0,
302
+ response_dropout_p: float = 0.0,
303
+ is_causal: bool = False,
304
+ ) -> torch.Tensor:
305
+ """
306
+ Computes scaled dot product attention on query, key and value tensors, using an optional
307
+ attention mask if passed, and applying dropout if a probability greater than 0.0 is specified.
308
+ """
309
+ if attn_mask is not None:
310
+ attn_mask = attn_mask.to(q.device)
311
+
312
+ if self.flash_attn_func is not None and attn_mask is None:
313
+ r = self.flash_attn_func(
314
+ q.transpose(1, 2), k.transpose(1, 2), v.transpose(1, 2), dropout_p=dropout_p, causal=is_causal
315
+ )
316
+ return r.transpose(1, 2)
317
+ else:
318
+ # torch's sdpa doesn't support GQA, so we're doing this
319
+ assert k.size(1) == v.size(1)
320
+ num_kv_heads = k.size(1)
321
+ num_q_heads = q.size(1)
322
+ if num_q_heads != num_kv_heads:
323
+ assert num_q_heads % num_kv_heads == 0
324
+ k = k.repeat_interleave(num_q_heads // num_kv_heads, dim=1, output_size=num_q_heads)
325
+ v = v.repeat_interleave(num_q_heads // num_kv_heads, dim=1, output_size=num_q_heads)
326
+
327
+ return F.scaled_dot_product_attention(
328
+ q,
329
+ k,
330
+ v,
331
+ attn_mask=attn_mask,
332
+ dropout_p=dropout_p,
333
+ is_causal=is_causal,
334
+ )
335
+
336
+ def attention(
337
+ self,
338
+ q: torch.Tensor,
339
+ k: torch.Tensor,
340
+ v: torch.Tensor,
341
+ attention_bias: Optional[torch.Tensor] = None,
342
+ position_ids: Optional[torch.Tensor] = None,
343
+ layer_past: Optional[Tuple[torch.Tensor, torch.Tensor]] = None,
344
+ use_cache: bool = False,
345
+ ) -> Tuple[torch.Tensor, Optional[Tuple[torch.Tensor, torch.Tensor]]]:
346
+ B, T, C = q.size() # batch size, sequence length, d_model
347
+ dtype = k.dtype
348
+
349
+ # Optionally apply layer norm to keys and queries.
350
+ if self.q_norm is not None and self.k_norm is not None:
351
+ q = self.q_norm(q).to(dtype=dtype)
352
+ k = self.k_norm(k).to(dtype=dtype)
353
+
354
+ # Move head forward to be next to the batch dim.
355
+ # shape: (B, nh, T, hs)
356
+ q = q.view(B, T, self.config.n_heads, C // self.config.n_heads).transpose(1, 2)
357
+ # shape: (B, n_kv_h, T, hs)
358
+ k = k.view(B, T, self.config.effective_n_kv_heads, C // self.config.n_heads).transpose(1, 2)
359
+ # shape: (B, n_kv_h, T, hs)
360
+ v = v.view(B, T, self.config.effective_n_kv_heads, C // self.config.n_heads).transpose(1, 2)
361
+
362
+ if self.config.use_position_ids and self.config.rope:
363
+ # Apply rotary embeddings
364
+ q, k = self.rotary_emb(q, k, position_ids=position_ids)
365
+
366
+ if layer_past is not None:
367
+ past_key, past_value = layer_past
368
+ k = torch.cat((past_key.to(k.device), k), dim=-2)
369
+ v = torch.cat((past_value.to(v.device), v), dim=-2)
370
+
371
+ present = (k, v) if use_cache else None
372
+ query_len, key_len = q.shape[-2], k.shape[-2] # could be different if layer_past not None
373
+
374
+ if not self.config.use_position_ids and self.config.rope:
375
+ # Apply rotary embeddings
376
+ q, k = self.rotary_emb(q, k)
377
+
378
+ if attention_bias is not None:
379
+ # Resize and cast attention bias.
380
+ # The current dtype of the attention bias might not match the dtype that the SDP attn function will
381
+ # run in if AMP is enabled, and this can be a problem if some tokens are masked out due to padding
382
+ # as down-casting the attention bias to the autocast precision will result in -infs, which will
383
+ # cause the SDP attn function to produce NaNs.
384
+ attention_bias = self._cast_attn_bias(
385
+ attention_bias[:, :, key_len - query_len : key_len, :key_len], dtype
386
+ )
387
+
388
+ # Get the attention scores.
389
+ # shape: (B, nh, T, hs)
390
+ att = self._scaled_dot_product_attention(
391
+ q,
392
+ k,
393
+ v,
394
+ attn_mask=attention_bias,
395
+ dropout_p=0.0 if not self.training else self.config.attention_dropout,
396
+ response_dropout_p=0.0 if not self.training else self.config.response_attention_dropout,
397
+ is_causal=attention_bias is None,
398
+ )
399
+
400
+ # Re-assemble all head outputs side-by-side.
401
+ att = att.transpose(1, 2).contiguous().view(B, T, C)
402
+
403
+ # Apply output projection.
404
+ return self.attn_out(att), present
405
+
406
+ def forward(
407
+ self,
408
+ x: torch.Tensor,
409
+ attention_bias: Optional[torch.FloatTensor] = None,
410
+ position_ids: Optional[torch.Tensor] = None,
411
+ layer_past: Optional[Tuple[torch.Tensor, torch.Tensor]] = None,
412
+ use_cache: bool = False,
413
+ ) -> Tuple[torch.Tensor, Optional[Tuple[torch.Tensor, torch.Tensor]]]:
414
+ raise NotImplementedError
415
+
416
+ @classmethod
417
+ def build(cls, layer_id: int, config: MolmoConfig, cache: BufferCache):
418
+ return MolmoSequentialBlock(layer_id, config, cache)
419
+
420
+
421
+ class MolmoSequentialBlock(MolmoBlock):
422
+ """
423
+ This is a typical transformer block where the output is computed as ``MLP(LN(x + Attention(LN(x))))``
424
+ (plus another skip connection).
425
+ """
426
+
427
+ def __init__(self, layer_id: int, config: MolmoConfig, cache: BufferCache):
428
+ super().__init__(layer_id, config, cache)
429
+ # Layer norms.
430
+ self.attn_norm = LayerNorm.build(config)
431
+ self.ff_norm = LayerNorm.build(config)
432
+ # Attention input projection. Projects x -> (q, k, v)
433
+
434
+ head_dim = config.d_model // config.n_heads
435
+ self.fused_dims = (
436
+ config.d_model,
437
+ config.effective_n_kv_heads * head_dim,
438
+ config.effective_n_kv_heads * head_dim,
439
+ )
440
+ self.att_proj = nn.Linear(
441
+ config.d_model, sum(self.fused_dims),
442
+ bias=config.include_bias or config.qkv_bias,
443
+ device=config.init_device
444
+ )
445
+ # Feed-forward input projection.
446
+ self.ff_proj = nn.Linear(
447
+ config.d_model, self.hidden_size, bias=config.include_bias, device=config.init_device
448
+ )
449
+
450
+ def reset_parameters(self):
451
+ super().reset_parameters()
452
+ self.attn_norm.reset_parameters()
453
+ self.ff_norm.reset_parameters()
454
+ # NOTE: the standard deviation for these weights does not depend on the layer.
455
+ init_weights(
456
+ self.config, self.att_proj, d=self.config.d_model, layer_id=None, type_of_module=ModuleType.in_module
457
+ )
458
+ init_weights(
459
+ self.config, self.ff_proj, d=self.config.d_model, layer_id=None, type_of_module=ModuleType.in_module
460
+ )
461
+
462
+ def forward(
463
+ self,
464
+ x: torch.Tensor,
465
+ attention_bias: Optional[torch.Tensor] = None,
466
+ position_ids: Optional[torch.Tensor] = None,
467
+ layer_past: Optional[Tuple[torch.Tensor, torch.Tensor]] = None,
468
+ use_cache: bool = False,
469
+ ) -> Tuple[torch.Tensor, Optional[Tuple[torch.Tensor, torch.Tensor]]]:
470
+ # Get query, key, value projections.
471
+ # shape:
472
+ # - for regular attn q, k, v: (batch_size, seq_len, d_model)
473
+ # - for multi-query attn q: (batch_size, seq_len, d_model)
474
+ # k, v: (batch_size, seq_len, d_model // n_heads)
475
+ # - for group query attn q: (batch_size, seq_len, d_model)
476
+ # k, v: (batch_size, seq_len, d_model // n_kv_heads)
477
+
478
+ if not self.config.norm_after:
479
+ if self._activation_checkpoint_fn is not None:
480
+ atten_in = self._activation_checkpoint_fn(self.attn_norm, x)
481
+ else:
482
+ atten_in = self.attn_norm(x)
483
+ else:
484
+ atten_in = x
485
+ qkv = self.att_proj(atten_in)
486
+
487
+ if self.config.clip_qkv is not None:
488
+ qkv.clamp_(min=-self.config.clip_qkv, max=self.config.clip_qkv)
489
+
490
+ q, k, v = qkv.split(self.fused_dims, dim=-1)
491
+
492
+ # Get attention scores.
493
+ if self._activation_checkpoint_fn is not None:
494
+ att, cache = self._activation_checkpoint_fn( # type: ignore
495
+ self.attention, q, k, v, attention_bias, position_ids=position_ids, layer_past=layer_past, use_cache=use_cache
496
+ )
497
+ else:
498
+ att, cache = self.attention(q, k, v, attention_bias, position_ids=position_ids, layer_past=layer_past, use_cache=use_cache)
499
+
500
+ if self.config.norm_after:
501
+ if self._activation_checkpoint_fn is not None:
502
+ att = self._activation_checkpoint_fn(self.attn_norm, att)
503
+ else:
504
+ att = self.attn_norm(att)
505
+
506
+ # Add attention scores.
507
+ # shape: (B, T, C)
508
+ x = x + self.dropout(att)
509
+
510
+ # Add feed-forward projection.
511
+ # shape: (batch_size, seq_len, d_model)
512
+ og_x = x
513
+
514
+ if not self.config.norm_after:
515
+ if self._activation_checkpoint_fn is not None:
516
+ x = self._activation_checkpoint_fn(self.ff_norm, x) # type: ignore
517
+ else:
518
+ x = self.ff_norm(x)
519
+
520
+ x = self.ff_proj(x)
521
+ if self._activation_checkpoint_fn is not None:
522
+ x = self._activation_checkpoint_fn(self.act, x) # type: ignore
523
+ else:
524
+ x = self.act(x)
525
+ x = self.ff_out(x)
526
+
527
+ if self.config.norm_after:
528
+ if self._activation_checkpoint_fn is not None:
529
+ x = self._activation_checkpoint_fn(self.ff_norm, x) # type: ignore
530
+ else:
531
+ x = self.ff_norm(x)
532
+
533
+ x = self.dropout(x)
534
+ x = og_x + x
535
+
536
+ return x, cache
537
+
538
+
539
+ class Embedding(nn.Module):
540
+ def __init__(
541
+ self,
542
+ num_embeddings: int,
543
+ num_new_embeddings: int,
544
+ features: int,
545
+ device: Union[str, torch.device],
546
+ initializer_range: float = 0.02,
547
+ new_embed_initializer_range: float = 0.02,
548
+ ):
549
+ super().__init__()
550
+ self.initializer_range = initializer_range
551
+ self.new_embed_initializer_range = new_embed_initializer_range
552
+ self.embedding = nn.Parameter(
553
+ torch.zeros(num_embeddings, features, device=device),
554
+ )
555
+ self.new_embedding = nn.Parameter(
556
+ torch.zeros(num_new_embeddings, features, device=device),
557
+ )
558
+
559
+ def reset_parameters(self):
560
+ nn.init.normal_(self.embedding, std=self.initializer_range)
561
+ nn.init.normal_(self.new_embedding, std=self.new_embed_initializer_range)
562
+
563
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
564
+ return F.embedding(x, torch.cat([self.embedding, self.new_embedding], dim=0))
565
+
566
+
567
+ class Dropout(nn.Dropout):
568
+ def __init__(
569
+ self,
570
+ p: float = 0.5,
571
+ inplace: bool = False,
572
+ mask_p: float = 0,
573
+ broadcast_dims: Sequence[int] = (),
574
+ ):
575
+ super().__init__(p, inplace)
576
+ self.mask_p = mask_p
577
+ self.broadcast_dims = broadcast_dims
578
+
579
+ def forward(self, input: torch.Tensor) -> torch.Tensor:
580
+ """
581
+ :param input: A tensor of shape `(batch_size, seq_len, embed_dim)`
582
+ """
583
+ if self.p == 0.0 and (self.mask_p is None or self.mask_p == 0.0):
584
+ return input
585
+ else:
586
+ if self.p > 0. and len(self.broadcast_dims) > 0 and self.training:
587
+ keep_prob = 1.0 - self.p
588
+ dropout_shape = list(input.shape)
589
+ for dim in self.broadcast_dims:
590
+ dropout_shape[dim] = 1
591
+ keep = input.new_empty(dropout_shape).bernoulli_(keep_prob)
592
+ multiplier = keep.broadcast_to(input.shape)
593
+ multiplier.div_(keep_prob)
594
+ input = input * multiplier
595
+ else:
596
+ return F.dropout(input, self.p, self.training, self.inplace)
597
+
598
+
599
+ @dataclass
600
+ class VisionBackboneConfig:
601
+ image_default_input_size: Tuple[int, int] = (336, 336)
602
+ image_patch_size: int = 14
603
+ image_pos_patch_size: int = 14
604
+ image_emb_dim: int = 1024
605
+ image_num_heads: int = 16
606
+ image_num_key_value_heads: int = 16
607
+ image_num_layers: int = 24
608
+ image_head_dim: int = 64
609
+ image_mlp_dim: int = 4096
610
+ image_mlp_activations: str = "gelu"
611
+ image_dropout_rate: float = 0.0
612
+ image_num_pos: int = 577
613
+ image_norm_eps: float = 1e-5
614
+ attention_dropout: float = 0.0
615
+ residual_dropout: float = 0.0
616
+ initializer_range: float = 0.02
617
+ fsdp_wrap: bool = False
618
+ resize_mode: str = "default"
619
+
620
+ def __post_init__(self):
621
+ self.image_default_input_size = tuple(self.image_default_input_size) # type: ignore[assignment]
622
+
623
+ @property
624
+ def image_num_patch(self):
625
+ h, w = self.image_default_input_size
626
+ return h // self.image_patch_size, w // self.image_patch_size
627
+
628
+
629
+ @dataclass
630
+ class FullMolmoConfig:
631
+ d_model: int = 768
632
+ n_heads: int = 12
633
+ n_kv_heads: Optional[int] = None
634
+ qkv_bias: bool = False
635
+ clip_qkv: Optional[float] = None
636
+ n_layers: int = 12
637
+ mlp_ratio: int = 4
638
+ mlp_hidden_size: Optional[int] = None
639
+ activation_type: str = "swiglu"
640
+ block_group_size: int = 1
641
+ rope: bool = True
642
+ rope_full_precision: bool = True
643
+ rope_theta: float = 10000.
644
+ rope_impl: str = "interleave"
645
+ vision_backbone: Optional[VisionBackboneConfig] = None
646
+ attention_type: str = "sdpa"
647
+ float32_attention: bool = True
648
+ attention_dropout: float = 0.1
649
+ response_attention_dropout: float = 0.0
650
+ multi_query_attention: Optional[bool] = None
651
+ attention_layer_norm: bool = False
652
+ residual_dropout: float = 0.1
653
+ embedding_dropout: float = 0.1
654
+ layer_norm_type: str = "default"
655
+ layer_norm_with_affine: bool = True
656
+ layer_norm_eps: Optional[float] = None
657
+ attention_layer_norm_with_affine: bool = True
658
+ max_sequence_length: int = 1024
659
+ max_position_embeddings: Optional[int] = None
660
+ include_bias: bool = True
661
+ bias_for_layer_norm: Optional[bool] = None
662
+ scale_logits: bool = False
663
+ vocab_size: int = 50257
664
+ embedding_size: Optional[int] = 50304
665
+ additional_vocab_size: Optional[int] = None
666
+ new_embedding_init_range: float = 0.02
667
+ weight_tying: bool = True
668
+ pad_token_id: int = -1
669
+ init_device: Optional[str] = None
670
+ init_std: float = 0.02
671
+ init_cutoff_factor: Optional[float] = None
672
+ norm_after: bool = False
673
+ precision: Optional[str] = None
674
+ image_padding_embed: Optional[str] = None
675
+ vit_layers: Tuple = (-1,)
676
+ image_pooling_h: int = 2
677
+ image_pooling_w: int = 2
678
+ image_pooling_2d: str = "attention"
679
+ image_projector: str = "mlp"
680
+ image_feature_dropout: float = 0.0
681
+ initializer_range: float = 0.02
682
+ normalize_input_embeds: bool = False
683
+ use_position_ids: bool = True
684
+
685
+ @property
686
+ def effective_n_kv_heads(self) -> int:
687
+ if self.n_kv_heads is None:
688
+ if self.multi_query_attention is True:
689
+ return 1
690
+ else:
691
+ return self.n_heads
692
+ else:
693
+ if self.multi_query_attention is None:
694
+ return self.n_kv_heads
695
+ if self.multi_query_attention:
696
+ n_kv_heads_should_be = 1
697
+ else:
698
+ n_kv_heads_should_be = self.n_heads
699
+ if self.n_kv_heads == n_kv_heads_should_be:
700
+ return n_kv_heads_should_be
701
+ else:
702
+ raise MolmoConfigurationError(
703
+ "You can't set `multi_query_attention` and `n_kv_heads` at the same time."
704
+ )
705
+
706
+ @property
707
+ def image_num_patch(self):
708
+ assert self.vision_backbone is not None
709
+ return self.vision_backbone.image_num_patch
710
+
711
+ @property
712
+ def image_patch_size(self):
713
+ assert self.vision_backbone is not None
714
+ return self.visoin_backbone.image_patch_size
715
+
716
+ def llm_patches_per_crop(self):
717
+ h, w = self.image_num_patch
718
+ # Round up in case we need to pad the image features for pooling
719
+ h = (h + self.image_pooling_h - 1) // self.image_pooling_h
720
+ w = (w + self.image_pooling_w - 1) // self.image_pooling_w
721
+ return h, w
722
+
723
+
724
+ def _expand_token(token, batch_size: int):
725
+ return token.view(1, 1, -1).expand(batch_size, -1, -1)
726
+
727
+
728
+ class ViTMLP(nn.Module):
729
+ def __init__(self, config: FullMolmoConfig):
730
+ super().__init__()
731
+ self.config = config
732
+ v_cfg = config.vision_backbone
733
+
734
+ self.w1 = nn.Linear(
735
+ v_cfg.image_emb_dim,
736
+ v_cfg.image_mlp_dim,
737
+ bias=True,
738
+ device=config.init_device,
739
+ )
740
+ # Activation function.
741
+ cfg = deepcopy(config)
742
+ cfg.activation_type = v_cfg.image_mlp_activations
743
+ self.act = Activation.build(cfg)
744
+ self.w2 = nn.Linear(
745
+ v_cfg.image_mlp_dim,
746
+ v_cfg.image_emb_dim,
747
+ bias=True,
748
+ device=config.init_device,
749
+ )
750
+
751
+ def reset_parameters(self):
752
+ v_cfg = self.config.vision_backbone
753
+ nn.init.trunc_normal_(self.w1.weight, std=math.sqrt(1 / v_cfg.image_emb_dim), a=-2.0, b=2.0)
754
+ nn.init.trunc_normal_(self.w2.weight, std=math.sqrt(1 / v_cfg.image_mlp_dim), a=-2.0, b=2.0)
755
+ nn.init.zeros_(self.w1.bias)
756
+ nn.init.zeros_(self.w2.bias)
757
+
758
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
759
+ x = self.w1(x)
760
+ x = self.act(x)
761
+ x = self.w2(x)
762
+ return x
763
+
764
+
765
+ class ResidualAttentionBlock(nn.Module):
766
+
767
+ def __init__(self, config: FullMolmoConfig):
768
+ super().__init__()
769
+ self.config = config
770
+
771
+ v_cfg = config.vision_backbone
772
+ self.attention = MultiHeadDotProductAttention(config)
773
+ self.feed_forward = ViTMLP(config)
774
+ self.attention_norm = nn.LayerNorm(
775
+ v_cfg.image_emb_dim,
776
+ eps=v_cfg.image_norm_eps,
777
+ device=config.init_device,
778
+ )
779
+ self.ffn_norm = nn.LayerNorm(
780
+ v_cfg.image_emb_dim,
781
+ eps=v_cfg.image_norm_eps,
782
+ device=config.init_device,
783
+ )
784
+
785
+ def reset_parameters(self):
786
+ self.attention.reset_parameters()
787
+ self.feed_forward.reset_parameters()
788
+ self.attention_norm.reset_parameters()
789
+ self.ffn_norm.reset_parameters()
790
+
791
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
792
+ x = x + self.attention(self.attention_norm(x))
793
+ x = x + self.feed_forward(self.ffn_norm(x))
794
+ return x
795
+
796
+
797
+ class BlockCollection(nn.Module):
798
+
799
+ def __init__(self, config: FullMolmoConfig):
800
+ super().__init__()
801
+ self.config = config
802
+ self.grad_checkpointing: bool = False
803
+
804
+ v_cfg = config.vision_backbone
805
+ self.resblocks = nn.ModuleList([
806
+ ResidualAttentionBlock(config) for _ in range(v_cfg.image_num_layers)
807
+ ])
808
+
809
+ def reset_parameters(self):
810
+ for r in self.resblocks:
811
+ r.reset_parameters()
812
+
813
+ def forward(self, x: torch.Tensor) -> List[torch.Tensor]:
814
+ hidden_states = []
815
+ for r in self.resblocks:
816
+ x = r(x)
817
+ hidden_states.append(x)
818
+ return hidden_states
819
+
820
+
821
+ class LayerNormFp32(nn.LayerNorm):
822
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
823
+ orig_type = x.dtype
824
+ x = F.layer_norm(x.to(torch.float32), self.normalized_shape, self.weight.to(torch.float32),
825
+ self.bias.to(torch.float32), self.eps)
826
+ return x.to(orig_type)
827
+
828
+
829
+ class VisionTransformer(nn.Module):
830
+
831
+ def __init__(self, config: FullMolmoConfig):
832
+ super().__init__()
833
+ self.config = config
834
+
835
+ v_cfg = config.vision_backbone
836
+ # class embeddings and positional embeddings
837
+ self.scale = v_cfg.image_emb_dim ** -0.5
838
+ self.class_embedding = nn.Parameter(
839
+ torch.zeros(v_cfg.image_emb_dim, device=config.init_device),
840
+ )
841
+ self.num_prefix_tokens: int = 1
842
+ self.positional_embedding = nn.Parameter(
843
+ torch.zeros(v_cfg.image_num_pos, v_cfg.image_emb_dim, device=config.init_device),
844
+ )
845
+
846
+ image_patch_size = v_cfg.image_patch_size
847
+ self.patch_embedding = nn.Linear(
848
+ image_patch_size * image_patch_size * 3,
849
+ v_cfg.image_emb_dim,
850
+ bias=False,
851
+ device=config.init_device,
852
+ )
853
+
854
+ self.pre_ln = LayerNormFp32(
855
+ v_cfg.image_emb_dim,
856
+ eps=v_cfg.image_norm_eps,
857
+ )
858
+
859
+ self.transformer = BlockCollection(config)
860
+
861
+ @torch.jit.ignore
862
+ def set_grad_checkpointing(self, enable=True):
863
+ self.transformer.grad_checkpointing = enable
864
+
865
+ def reset_parameters(self):
866
+ nn.init.normal_(self.class_embedding, std=self.scale)
867
+ nn.init.normal_(self.positional_embedding, std=self.scale)
868
+ nn.init.normal_(self.patch_embedding.weight, std=0.02)
869
+ self.pre_ln.reset_parameters()
870
+ self.transformer.reset_parameters()
871
+
872
+ def add_pos_emb(self, x: torch.Tensor, patch_num: int) -> torch.Tensor:
873
+ cls_emb = self.positional_embedding[0:1]
874
+ pos_emb = self.positional_embedding[1:]
875
+
876
+ pos_emb = pos_emb.reshape(
877
+ (int(math.sqrt(pos_emb.shape[0])), int(math.sqrt(pos_emb.shape[0])), pos_emb.shape[1])
878
+ )
879
+
880
+ (patch_num_0, patch_num_1) = patch_num
881
+
882
+ if pos_emb.shape[0] != patch_num_0 or pos_emb.shape[1] != patch_num_1:
883
+ # Dervied from https://github.com/facebookresearch/mae/blob/main/util/pos_embed.py
884
+ # antialias: default True in jax.image.resize
885
+ pos_emb = pos_emb.unsqueeze(0).permute(0, 3, 1, 2)
886
+ pos_emb = F.interpolate(
887
+ pos_emb, size=(patch_num_0, patch_num_1), mode="bicubic", align_corners=False, antialias=True,
888
+ )
889
+ pos_emb = pos_emb.permute(0, 2, 3, 1).squeeze(0)
890
+
891
+ pos_emb = pos_emb.reshape(-1, pos_emb.shape[-1])
892
+ x = x + torch.cat([cls_emb[None, :, :], pos_emb[None, :, :]], dim=1).to(x.dtype)
893
+ return x
894
+
895
+ def forward(self, x: torch.Tensor, patch_num: int = None) -> List[torch.Tensor]:
896
+ """
897
+ : param x: (batch_size, num_patch, n_pixels)
898
+ """
899
+ if patch_num is None:
900
+ patch_num = self.config.vision_backbone.image_num_patch
901
+ B, N, D = x.shape
902
+
903
+ x = self.patch_embedding(x)
904
+
905
+ # class embeddings and positional embeddings
906
+ x = torch.cat([_expand_token(self.class_embedding, x.shape[0]).to(x.dtype), x], dim=1)
907
+ x = self.add_pos_emb(x, patch_num)
908
+
909
+ x = self.pre_ln(x)
910
+
911
+ hidden_states = self.transformer(x)
912
+ return hidden_states
913
+
914
+
915
+ class MultiHeadDotProductAttention(nn.Module):
916
+ def __init__(self, config: FullMolmoConfig, use_bias: bool = True, is_vit_layer: Optional[bool] = True):
917
+ super().__init__()
918
+ self.config = config
919
+ self.use_bias = use_bias
920
+
921
+ v_cfg = config.vision_backbone
922
+ self.embed_dim = v_cfg.image_emb_dim
923
+ self.num_heads = v_cfg.image_num_heads
924
+ self.head_dim = v_cfg.image_head_dim
925
+ self.num_key_value_heads = v_cfg.image_num_key_value_heads
926
+ self.num_key_value_groups = self.num_heads // self.num_key_value_heads
927
+ self.initializer_range = v_cfg.initializer_range
928
+ self.is_vit_layer = is_vit_layer
929
+
930
+ nlayers = 1 if (is_vit_layer or config.vit_layers is None) else len(config.vit_layers)
931
+
932
+ self.wq = nn.Linear(
933
+ nlayers * self.embed_dim,
934
+ self.num_heads * self.head_dim,
935
+ bias=use_bias,
936
+ device=config.init_device,
937
+ )
938
+ self.wk = nn.Linear(
939
+ nlayers * self.embed_dim,
940
+ self.num_key_value_heads * self.head_dim,
941
+ bias=use_bias,
942
+ device=config.init_device,
943
+ )
944
+ self.wv = nn.Linear(
945
+ nlayers * self.embed_dim,
946
+ self.num_key_value_heads * self.head_dim,
947
+ bias=use_bias,
948
+ device=config.init_device,
949
+ )
950
+ self.wo = nn.Linear(
951
+ self.num_heads * self.head_dim,
952
+ self.embed_dim,
953
+ bias=use_bias,
954
+ device=config.init_device,
955
+ )
956
+ self.attention_dropout: Optional[Dropout] = None
957
+ if v_cfg.attention_dropout > 0:
958
+ self.attention_dropout = Dropout(v_cfg.attention_dropout, broadcast_dims=(0, 1))
959
+ self.residual_dropout = Dropout(v_cfg.residual_dropout)
960
+
961
+ def reset_parameters(self):
962
+ nn.init.normal_(self.wq.weight, std=self.initializer_range)
963
+ nn.init.normal_(self.wk.weight, std=self.initializer_range)
964
+ nn.init.normal_(self.wv.weight, std=self.initializer_range)
965
+ nn.init.normal_(self.wo.weight, std=self.initializer_range)
966
+ if self.use_bias:
967
+ nn.init.constant_(self.wq.bias, 0)
968
+ nn.init.constant_(self.wk.bias, 0)
969
+ nn.init.constant_(self.wv.bias, 0)
970
+ nn.init.constant_(self.wo.bias, 0)
971
+
972
+ def _split_heads(self, hidden_states, num_heads) -> torch.Tensor:
973
+ return hidden_states.reshape(hidden_states.shape[:2] + (num_heads, self.head_dim))
974
+
975
+ def _merge_heads(self, hidden_states) -> torch.Tensor:
976
+ return hidden_states.reshape(hidden_states.shape[:2] + (self.embed_dim,))
977
+
978
+ def forward(self, inputs_q: torch.Tensor, inputs_kv: Optional[torch.Tensor] = None) -> torch.Tensor:
979
+
980
+ if inputs_kv is not None:
981
+ inputs_k = inputs_kv
982
+ inputs_v = inputs_kv
983
+ else:
984
+ inputs_k = inputs_q
985
+ inputs_v = inputs_q
986
+
987
+ xq, xk, xv = self.wq(inputs_q), self.wk(inputs_k), self.wv(inputs_v)
988
+
989
+ xq = self._split_heads(xq, self.num_heads)
990
+ xk = self._split_heads(xk, self.num_key_value_heads)
991
+ xv = self._split_heads(xv, self.num_key_value_heads)
992
+
993
+ if self.num_heads != self.num_key_value_heads:
994
+ xk = xk.repeat_interleave(self.num_key_value_groups, dim=2, output_size=self.num_heads)
995
+ xv = xv.repeat_interleave(self.num_key_value_groups, dim=2, output_size=self.num_heads)
996
+
997
+ og_dtype = xq.dtype
998
+
999
+ if self.config.float32_attention:
1000
+ xq = xq.to(torch.float)
1001
+ xk = xk.to(torch.float)
1002
+
1003
+ if self.config.attention_type == "direct":
1004
+ attn_weights = torch.einsum("...qhd,...khd->...hqk", xq / math.sqrt(xq.size(-1)), xk)
1005
+ attn_weights = F.softmax(attn_weights, dim=-1, dtype=torch.float32).to(xq.dtype)
1006
+ if self.attention_dropout is not None:
1007
+ attn_weights = self.attention_dropout(attn_weights)
1008
+ attn_output = torch.einsum("...hqk,...khd->...qhd", attn_weights.to(xv.dtype), xv)
1009
+
1010
+ elif self.config.attention_type == "sdpa":
1011
+ if self.config.float32_attention and not torch.is_autocast_enabled():
1012
+ xv = xv.to(torch.float32)
1013
+ attn_output = F.scaled_dot_product_attention(
1014
+ xq.transpose(1, 2).contiguous(),
1015
+ xk.transpose(1, 2).contiguous(),
1016
+ xv.transpose(1, 2).contiguous(),
1017
+ is_causal=False,
1018
+ dropout_p=self.config.vision_backbone.attention_dropout
1019
+ ).transpose(1, 2)
1020
+ else:
1021
+ raise NotImplementedError(self.config.attention_type)
1022
+ attn_output = attn_output.to(og_dtype)
1023
+ attn_output = self._merge_heads(attn_output)
1024
+ attn_output = self.wo(attn_output)
1025
+ attn_output = self.residual_dropout(attn_output)
1026
+
1027
+ return attn_output
1028
+
1029
+
1030
+ class MultiHeadAttentionPool(nn.Module):
1031
+ def __init__(
1032
+ self,
1033
+ config: FullMolmoConfig,
1034
+ factor: int = 1,
1035
+ use_bias: bool = True,
1036
+ dropout: bool = True,
1037
+ output_layer: bool = True,
1038
+ mean_residual: bool = False,
1039
+ query: str = "mean",
1040
+ is_vit_layer: Optional[bool] = True
1041
+ ):
1042
+ super().__init__()
1043
+ self.config = config
1044
+ self.factor = factor
1045
+ self.use_bias = use_bias
1046
+ self.dropout = dropout
1047
+ self.output_layer = output_layer
1048
+ self.mean_residual = mean_residual
1049
+ self.query = query
1050
+
1051
+ v_cfg = config.vision_backbone
1052
+ input_dim = v_cfg.image_emb_dim
1053
+ self.embed_dim = v_cfg.image_emb_dim * factor
1054
+ self.num_heads = v_cfg.image_num_heads
1055
+ self.head_dim = v_cfg.image_head_dim * factor
1056
+ self.num_key_value_heads = v_cfg.image_num_key_value_heads
1057
+ self.num_key_value_groups = self.num_heads // self.num_key_value_heads
1058
+ self.initializer_range = v_cfg.initializer_range
1059
+
1060
+ nlayers = 1 if (is_vit_layer or config.vit_layers is None) else len(config.vit_layers)
1061
+
1062
+ if query != "vector":
1063
+ self.wq = nn.Linear(
1064
+ nlayers * input_dim,
1065
+ self.num_heads * self.head_dim,
1066
+ bias=use_bias,
1067
+ device=config.init_device,
1068
+ )
1069
+ self.wk = nn.Linear(
1070
+ nlayers * input_dim,
1071
+ self.num_key_value_heads * self.head_dim,
1072
+ bias=use_bias,
1073
+ device=config.init_device,
1074
+ )
1075
+ self.wv = nn.Linear(
1076
+ nlayers * input_dim,
1077
+ self.num_key_value_heads * self.head_dim,
1078
+ bias=use_bias,
1079
+ device=config.init_device,
1080
+ )
1081
+
1082
+ if query == "vector":
1083
+ self.attention_query = nn.Parameter(
1084
+ torch.zeros(
1085
+ 1, self.num_key_value_heads * self.head_dim, device=config.init_device,
1086
+ ),
1087
+ )
1088
+
1089
+ if output_layer:
1090
+ self.wo = nn.Linear(
1091
+ self.num_heads * self.head_dim,
1092
+ self.embed_dim,
1093
+ bias=use_bias,
1094
+ device=config.init_device,
1095
+ )
1096
+ self.attention_dropout = Dropout(v_cfg.attention_dropout, broadcast_dims=(0, 1))
1097
+ if dropout:
1098
+ self.residual_dropout = Dropout(v_cfg.residual_dropout)
1099
+
1100
+ def reset_parameters(self):
1101
+ if self.query != "vector":
1102
+ nn.init.normal_(self.wq.weight, std=self.initializer_range)
1103
+ nn.init.normal_(self.wk.weight, std=self.initializer_range)
1104
+ nn.init.normal_(self.wv.weight, std=self.initializer_range)
1105
+ if self.output_layer:
1106
+ nn.init.normal_(self.wo.weight, std=self.initializer_range)
1107
+ if self.use_bias:
1108
+ if self.query != "vector":
1109
+ nn.init.constant_(self.wq.bias, 0)
1110
+ nn.init.constant_(self.wk.bias, 0)
1111
+ nn.init.constant_(self.wv.bias, 0)
1112
+ if self.output_layer:
1113
+ nn.init.constant_(self.wo.bias, 0)
1114
+ if self.query == "vector":
1115
+ nn.init.normal_(self.attention_query, std=self.initializer_range)
1116
+
1117
+ def _split_heads(self, hidden_states, num_heads):
1118
+ return hidden_states.reshape(hidden_states.shape[:2] + (num_heads, self.head_dim))
1119
+
1120
+ def _merge_heads(self, hidden_states):
1121
+ return hidden_states.reshape(hidden_states.shape[:2] + (self.embed_dim,))
1122
+
1123
+ def forward(self, inputs_kv: torch.Tensor) -> torch.Tensor:
1124
+
1125
+ xk, xv = self.wk(inputs_kv), self.wv(inputs_kv)
1126
+
1127
+ if self.query == "mean":
1128
+ inputs_q = inputs_kv.mean(dim=1, keepdim=True)
1129
+ xq = self.wq(inputs_q)
1130
+ elif self.query == "first":
1131
+ inputs_q = inputs_kv[:, :1]
1132
+ xq = self.wq(inputs_q)
1133
+ elif self.query == "vector":
1134
+ xq = self.attention_query.expand(inputs_kv.size(0), -1, -1)
1135
+ elif self.query == "constant":
1136
+ inputs_q = torch.ones_like(inputs_kv[:, :1]) / math.sqrt(inputs_kv.shape[-1])
1137
+ xq = self.wq(inputs_q)
1138
+ else:
1139
+ raise ValueError(f"Unknown query type: {self.query}")
1140
+
1141
+ xq = self._split_heads(xq, self.num_heads)
1142
+ xk = self._split_heads(xk, self.num_key_value_heads)
1143
+ xv = self._split_heads(xv, self.num_key_value_heads)
1144
+
1145
+ if self.num_heads != self.num_key_value_heads:
1146
+ xk = xk.repeat_interleave(self.num_key_value_groups, dim=2, output_size=self.num_heads)
1147
+ xv = xv.repeat_interleave(self.num_key_value_groups, dim=2, output_size=self.num_heads)
1148
+
1149
+ xq = xq.to(torch.float)
1150
+ xk = xk.to(torch.float)
1151
+
1152
+ xq = xq / math.sqrt(xq.size(-1))
1153
+ attn_weights = torch.einsum("...qhd,...khd->...hqk", xq, xk)
1154
+
1155
+ attn_weights = F.softmax(attn_weights, dim=-1).to(xq.dtype)
1156
+
1157
+ attn_weights = self.attention_dropout(attn_weights).to(xv.dtype)
1158
+
1159
+ attn_output = torch.einsum("...hqk,...khd->...qhd", attn_weights, xv)
1160
+ attn_output = self._merge_heads(attn_output)
1161
+ if self.output_layer:
1162
+ attn_output = self.wo(attn_output)
1163
+ if self.dropout:
1164
+ attn_output = self.residual_dropout(attn_output)
1165
+ if self.mean_residual:
1166
+ attn_output += inputs_kv.mean(dim=1, keepdim=True)
1167
+
1168
+ return attn_output
1169
+
1170
+
1171
+ class MLP(nn.Module):
1172
+ def __init__(self, config: FullMolmoConfig, input_dim: int, dropout: float = 0.0):
1173
+ super().__init__()
1174
+ self.config = config
1175
+ self.hidden_size = (
1176
+ config.mlp_hidden_size if config.mlp_hidden_size is not None else config.mlp_ratio * config.d_model
1177
+ )
1178
+ self.initializer_range = config.initializer_range
1179
+
1180
+ self.w1 = nn.Linear(
1181
+ input_dim,
1182
+ self.hidden_size // 2,
1183
+ bias=False,
1184
+ device=config.init_device,
1185
+ )
1186
+ self.w2 = nn.Linear(
1187
+ self.hidden_size // 2,
1188
+ config.d_model,
1189
+ bias=False,
1190
+ device=config.init_device,
1191
+ )
1192
+ self.w3 = nn.Linear(
1193
+ input_dim,
1194
+ self.hidden_size // 2,
1195
+ bias=False,
1196
+ device=config.init_device,
1197
+ )
1198
+ # Activation function.
1199
+ self.act = Activation.build(config)
1200
+ self.dropout = Dropout(dropout)
1201
+
1202
+ def reset_parameters(self):
1203
+ nn.init.normal_(self.w1.weight, std=self.initializer_range)
1204
+ nn.init.normal_(self.w2.weight, std=self.initializer_range)
1205
+ nn.init.normal_(self.w3.weight, std=self.initializer_range)
1206
+
1207
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
1208
+ x = self.w2(self.act(self.w1(x), self.w3(x)))
1209
+ x = self.dropout(x)
1210
+ return x
1211
+
1212
+
1213
+ class Residual(nn.Module):
1214
+ def __init__(self, submodule: nn.Module):
1215
+ super().__init__()
1216
+ self.submodule = submodule
1217
+
1218
+ def reset_parameters(self):
1219
+ self.submodule.reset_parameters()
1220
+
1221
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
1222
+ return x + self.submodule(x)
1223
+
1224
+
1225
+ class OLMoVisionBackbone(nn.Module):
1226
+ def __init__(self, config: FullMolmoConfig):
1227
+ super().__init__()
1228
+ self.config = config
1229
+ self.image_vit = VisionTransformer(config)
1230
+
1231
+ input_dim: int = None
1232
+ self.image_pooling_2d: nn.Module = None
1233
+ if config.image_pooling_2d in {ImagePooling2DType.attention, ImagePooling2DType.attention_meanq}:
1234
+ self.image_pooling_2d = MultiHeadDotProductAttention(config, is_vit_layer=False)
1235
+ input_dim = config.vision_backbone.image_emb_dim
1236
+ elif config.image_pooling_2d == ImagePooling2DType.attention_2wide:
1237
+ cfg = deepcopy(config)
1238
+ cfg.vision_backbone.image_emb_dim *= 2
1239
+ cfg.vision_backbone.image_head_dim *= 2
1240
+ self.image_pooling_2d = MultiHeadDotProductAttention(cfg, is_vit_layer=False)
1241
+ input_dim = cfg.vision_backbone.image_emb_dim
1242
+ elif config.image_pooling_2d == ImagePooling2DType.attention_v2:
1243
+ assert config.vit_layers is not None
1244
+ use_bias = True
1245
+ dropout = True
1246
+ output_layer = True
1247
+ query = "mean"
1248
+ mean_residual = False
1249
+ factor = len(config.vit_layers)
1250
+ self.image_pooling_2d = MultiHeadAttentionPool(
1251
+ config,
1252
+ factor=factor,
1253
+ use_bias=use_bias,
1254
+ dropout=dropout,
1255
+ output_layer=output_layer,
1256
+ mean_residual=mean_residual,
1257
+ query=query,
1258
+ is_vit_layer=False,
1259
+ )
1260
+ input_dim = config.vision_backbone.image_emb_dim * factor
1261
+ elif config.image_pooling_2d in [ImagePooling2DType.none, ImagePooling2DType.stack]:
1262
+ self.image_pooling_2d = None
1263
+ nlayers = 1 if config.vit_layers is None else len(config.vit_layers)
1264
+ input_dim = nlayers * config.vision_backbone.image_emb_dim
1265
+ else:
1266
+ raise NotImplementedError(f"Unknown image pooling 2D method: {config.image_pooling_2d}")
1267
+
1268
+ self.input_dim = input_dim
1269
+
1270
+ # `MLP` assume the activation takes two inputs, so it must be a 'llama' version
1271
+ if config.activation_type == ActivationType.swiglu:
1272
+ mlp_config = replace(config, activation_type=ActivationType.llama_swiglu)
1273
+ elif config.activation_type == ActivationType.gelu:
1274
+ mlp_config = replace(config, activation_type=ActivationType.llama_geglu)
1275
+ else:
1276
+ mlp_config = config
1277
+ if config.image_projector == ImageProjectType.mlpx2:
1278
+ self.image_projector = nn.ModuleList(
1279
+ [MLP(mlp_config, input_dim), Residual(MLP(config, input_dim))]
1280
+ )
1281
+ elif config.image_projector == ImageProjectType.mlp:
1282
+ self.image_projector = MLP(mlp_config, input_dim)
1283
+ elif config.image_projector == ImageProjectType.linear:
1284
+ self.image_projector = nn.Linear(
1285
+ input_dim,
1286
+ config.d_model,
1287
+ bias=False,
1288
+ device=config.init_device,
1289
+ )
1290
+ else:
1291
+ raise NotImplementedError(f"Unknown image projector: {config.image_projector}")
1292
+
1293
+ self.image_feature_dropout = Dropout(config.image_feature_dropout)
1294
+
1295
+ def reset_parameters(self):
1296
+ if self.image_pooling_2d is not None:
1297
+ self.image_pooling_2d.reset_parameters()
1298
+ if self.config.image_projector == "2mlp":
1299
+ for module in self.image_projector:
1300
+ module.reset_parameters()
1301
+ elif self.config.image_projector == "linear":
1302
+ nn.init.xavier_uniform_(self.image_projector.weight)
1303
+ else:
1304
+ self.image_projector.reset_parameters()
1305
+
1306
+ def forward(self, images: torch.Tensor, image_masks: torch.Tensor) -> Tuple[torch.Tensor, Optional[torch.Tensor]]:
1307
+ raise NotImplementedError
1308
+
1309
+
1310
+ class OLMoPretrainedVisionBackbone(OLMoVisionBackbone):
1311
+ def __init__(self, config: FullMolmoConfig):
1312
+ super().__init__(config)
1313
+ v_cfg = self.config.vision_backbone
1314
+ self.grad_checkpointing = False
1315
+
1316
+ self.num_prefix_tokens = self.image_vit.num_prefix_tokens
1317
+ assert self.num_prefix_tokens in {0, 1}, "Only 0 or 1 prefix tokens are supported"
1318
+
1319
+ self.pad_embed = None
1320
+ if config.image_padding_embed:
1321
+ image_dim = v_cfg.image_emb_dim*len(self.config.vit_layers)
1322
+ if config.image_padding_embed in ["pad_embed", "regress"]:
1323
+ self.pad_embed = nn.Parameter(
1324
+ torch.zeros((image_dim,), device=config.init_device))
1325
+ elif config.image_padding_embed == "pad_and_partial_pad":
1326
+ self.pad_embed = nn.Parameter(
1327
+ torch.zeros((2, image_dim), device=config.init_device))
1328
+ else:
1329
+ raise ValueError(config.image_padding_embed)
1330
+
1331
+ def reset_parameters(self):
1332
+ super().reset_parameters()
1333
+ self.image_vit.reset_parameters()
1334
+
1335
+ def encode_image(self, images: torch.Tensor) -> torch.Tensor:
1336
+ """
1337
+ : param images: (batch_size, num_crops, num_patch, n_pixels)
1338
+ """
1339
+ cfg = self.config
1340
+ v_cfg = self.config.vision_backbone
1341
+ B, T, N, D = images.shape
1342
+
1343
+ mask = ~torch.all(images.view(B * T, N, D) == -1, dim=(1, 2), keepdim=True)
1344
+
1345
+ # Output all hidden states
1346
+ # n_layers x (batch_num_crops, (1+)n_tokens, image_emb_dim)
1347
+ images = images.view(B * T, N, D)
1348
+ image_features = self.image_vit(images)
1349
+
1350
+ if cfg.vit_layers is not None:
1351
+ features = []
1352
+ for layer in cfg.vit_layers:
1353
+ features.append(image_features[layer])
1354
+ image_features = torch.cat(features, dim=-1)
1355
+ else:
1356
+ image_features = image_features[-1]
1357
+
1358
+ cls_embed: torch.Tensor = None
1359
+ if self.num_prefix_tokens > 0:
1360
+ cls_embed = image_features[:, 0]
1361
+ image_features = image_features[:, 1:]
1362
+
1363
+ image_features = image_features * mask
1364
+ image_features = image_features.view(B, T, N, -1)
1365
+
1366
+ cls_embed = cls_embed.view(B, T, -1) if cls_embed is not None else None
1367
+
1368
+ return image_features, cls_embed
1369
+
1370
+ def forward(self, images: torch.Tensor, image_masks: torch.Tensor) -> Tuple[torch.Tensor, Optional[torch.Tensor]]:
1371
+ cfg = self.config
1372
+
1373
+ # image_features: (batch_size, num_crops(=num_image), num_patch, nximage_emb_dim)
1374
+ batch_size, num_image = images.shape[:2]
1375
+ image_features, cls_embed = self.encode_image(images)
1376
+
1377
+ if cfg.image_padding_embed:
1378
+ assert image_masks is not None
1379
+ if cfg.image_padding_embed == "pad_embed":
1380
+ all_pad = (image_masks == 0).to(dtype=torch.float32)
1381
+ pad_embed = self.pad_embed[None, None, None, :]
1382
+ image_features = image_features + pad_embed * torch.unsqueeze(all_pad, -1)
1383
+ elif cfg.image_padding_embed == "regress":
1384
+ pad_embed = self.pad_embed[None, None, None, :]
1385
+ image_features = image_features + pad_embed * torch.unsqueeze(torch.maximum(image_masks, torch.zeros_like(image_masks)), -1)
1386
+ elif cfg.image_padding_embed == "pad_and_partial_pad":
1387
+ pad_embed = self.pad_embed[:, None, None, None, :]
1388
+ all_pad = image_masks == 0
1389
+ partial_pad = torch.logical_and(image_masks < 1, torch.logical_not(all_pad)).to(dtype=image_features.dtype)
1390
+ all_pad = all_pad.to(dtype=image_features.dtype)
1391
+ image_features = image_features + pad_embed[0] * torch.unsqueeze(all_pad, -1)
1392
+ image_features = image_features + pad_embed[1] * torch.unsqueeze(partial_pad, -1)
1393
+ else:
1394
+ raise ValueError(cfg.image_padding_embed)
1395
+
1396
+ image_features = self.image_feature_dropout(image_features)
1397
+ if cls_embed is not None:
1398
+ cls_embed = self.image_feature_dropout(cls_embed)
1399
+
1400
+ image_features = image_features.reshape(
1401
+ (batch_size, num_image) + cfg.image_num_patch + (-1,),
1402
+ )
1403
+
1404
+ if cfg.image_num_patch[0] % cfg.image_pooling_h == 1:
1405
+ # Pad so we can still pool 2x2 patches
1406
+ image_features = F.pad(
1407
+ image_features,
1408
+ (0, 0, 0, 1, 0, 1, 0, 0, 0, 0),
1409
+ )
1410
+
1411
+ # image pooling
1412
+ image_features = einops.rearrange(
1413
+ image_features,
1414
+ 'b n (h dh) (w dw) c -> (b n h w) (dh dw) c',
1415
+ dh=cfg.image_pooling_h,
1416
+ dw=cfg.image_pooling_w,
1417
+ )
1418
+
1419
+ if cfg.image_pooling_2d == ImagePooling2DType.attention_meanq:
1420
+ query = image_features.mean(-2, keepdim=True)
1421
+ image_features = self.image_pooling_2d(query, image_features)
1422
+ elif cfg.image_pooling_2d not in {ImagePooling2DType.none, ImagePooling2DType.stack}:
1423
+ if self.grad_checkpointing:
1424
+ from torch.utils.checkpoint import checkpoint
1425
+ image_features = checkpoint(self.image_pooling_2d, image_features[:, :1, :], image_features, use_reentrant=False)
1426
+ else:
1427
+ image_features = self.image_pooling_2d(image_features[:, :1, :], image_features)
1428
+
1429
+ h, w = cfg.llm_patches_per_crop()
1430
+ image_features = image_features.reshape(batch_size, num_image, h * w, -1)
1431
+
1432
+ # MLP layer to map the feature.
1433
+ if self.grad_checkpointing:
1434
+ from torch.utils.checkpoint import checkpoint
1435
+ image_features = checkpoint(self.image_projector, image_features, use_reentrant=False)
1436
+ else:
1437
+ image_features = self.image_projector(image_features)
1438
+
1439
+ # image_features: (batch_size, num_image, num_patch, d_model)
1440
+ # cls_embed: (batch_size, num_image, d_model)
1441
+ return image_features, cls_embed
1442
+
1443
+
1444
+ class ModuleType(str, Enum):
1445
+ in_module = "in"
1446
+ out_module = "out"
1447
+ emb = "emb"
1448
+ final_out = "final_out"
1449
+
1450
+
1451
+ def init_weights(
1452
+ config: FullMolmoConfig,
1453
+ module: Union[nn.Linear, nn.Embedding],
1454
+ d: Optional[int] = None,
1455
+ layer_id: Optional[int] = None,
1456
+ std_factor: float = 1.0,
1457
+ type_of_module: Optional[ModuleType] = None,
1458
+ ) -> None:
1459
+ d = d if d is not None else config.d_model
1460
+ std = config.init_std * std_factor
1461
+ if config.init_cutoff_factor is not None:
1462
+ cutoff_value = config.init_cutoff_factor * std
1463
+ nn.init.trunc_normal_(module.weight, mean=0.0, std=std, a=-cutoff_value, b=cutoff_value)
1464
+ else:
1465
+ nn.init.normal_(module.weight, mean=0.0, std=std)
1466
+
1467
+
1468
+ class LlamaSwiGLU(nn.Module):
1469
+ def forward(self, x1: torch.Tensor, x2: torch.Tensor) -> torch.Tensor:
1470
+ return F.silu(x1) * x2
1471
+
1472
+ @property
1473
+ def output_multiplier(self) -> float:
1474
+ return 0.5
1475
+
1476
+
1477
+ class SwiGLU(nn.Module):
1478
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
1479
+ x, gate = x.chunk(2, dim=-1)
1480
+ return F.silu(gate) * x
1481
+
1482
+ @property
1483
+ def output_multiplier(self) -> float:
1484
+ return 0.5
1485
+
1486
+
1487
+ class Activation(nn.Module):
1488
+ def __init__(self, config: FullMolmoConfig):
1489
+ super().__init__()
1490
+ self.config = config
1491
+
1492
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
1493
+ raise NotImplementedError
1494
+
1495
+ @property
1496
+ def output_multiplier(self) -> float:
1497
+ raise NotImplementedError
1498
+
1499
+ @classmethod
1500
+ def build(cls, config: FullMolmoConfig) -> 'Activation':
1501
+ if config.activation_type == "quick_gelu":
1502
+ return QuickGELU(config)
1503
+ elif config.activation_type == "gelu":
1504
+ return cast(Activation, GELU(approximate="none"))
1505
+ elif config.activation_type == "gelu_tanh":
1506
+ return cast(Activation, GELU(approximate="tanh"))
1507
+ elif config.activation_type == "relu":
1508
+ return cast(Activation, ReLU(inplace=False))
1509
+ elif config.activation_type == "silu":
1510
+ return cast(Activation, SiLU(inplace=False))
1511
+ # elif config.activation_type == "llama_geglu":
1512
+ # return LlamaGEGLU(config)
1513
+ # elif config.activation_type == "llama_geglu_tanh":
1514
+ # return LlamaGEGLUTanh(config)
1515
+ elif config.activation_type == "llama_swiglu":
1516
+ return LlamaSwiGLU()
1517
+ elif config.activation_type == "swiglu":
1518
+ return SwiGLU()
1519
+ else:
1520
+ raise NotImplementedError(f"Unknown activation: '{config.activation_type}'")
1521
+
1522
+
1523
+ class QuickGELU(Activation):
1524
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
1525
+ return x * torch.sigmoid(1.702 * x)
1526
+
1527
+ @property
1528
+ def output_multiplier(self) -> float:
1529
+ return 1.0
1530
+
1531
+
1532
+ class GELU(nn.GELU):
1533
+ @property
1534
+ def output_multiplier(self) -> float:
1535
+ return 1.0
1536
+
1537
+
1538
+ class ReLU(nn.ReLU):
1539
+ @property
1540
+ def output_multiplier(self) -> float:
1541
+ return 1.0
1542
+
1543
+
1544
+ class SiLU(nn.SiLU):
1545
+ @property
1546
+ def output_multiplier(self) -> float:
1547
+ return 1.0
1548
+
1549
+
1550
+ def causal_attention_bias(seq_len: int, device: torch.device) -> torch.FloatTensor:
1551
+ att_bias = torch.triu(
1552
+ torch.ones(seq_len, seq_len, device=device, dtype=torch.float),
1553
+ diagonal=1,
1554
+ )
1555
+ att_bias.masked_fill_(att_bias == 1, torch.finfo(att_bias.dtype).min)
1556
+ return att_bias.view(1, 1, seq_len, seq_len) # type: ignore
1557
+
1558
+
1559
+ def get_causal_attention_bias(cache: BufferCache, seq_len: int, device: torch.device) -> torch.Tensor:
1560
+ if (causal_bias := cache.get("causal_attention_bias")) is not None and causal_bias.shape[-1] >= seq_len:
1561
+ if causal_bias.device != device:
1562
+ causal_bias = causal_bias.to(device)
1563
+ cache["causal_attention_bias"] = causal_bias
1564
+ return causal_bias
1565
+ with torch.autocast(device.type, enabled=False):
1566
+ causal_bias = causal_attention_bias(seq_len, device)
1567
+ cache["causal_attention_bias"] = causal_bias
1568
+ return causal_bias
1569
+
1570
+
1571
+ class LayerNormBase(nn.Module):
1572
+ def __init__(
1573
+ self,
1574
+ config: MolmoConfig,
1575
+ *,
1576
+ size: Optional[int] = None,
1577
+ elementwise_affine: Optional[bool] = True,
1578
+ eps: float = 1e-05,
1579
+ weight_initializer: Optional[Callable] = torch.ones,
1580
+ bias_initializer: Optional[Callable] = torch.zeros,
1581
+ ):
1582
+ super().__init__()
1583
+ self.config = config
1584
+ self.eps = self.config.layer_norm_eps or eps
1585
+ self.normalized_shape = (size or config.d_model,)
1586
+ if elementwise_affine or (elementwise_affine is None and self.config.layer_norm_with_affine):
1587
+ self.weight = nn.Parameter(weight_initializer(self.normalized_shape, device=config.init_device))
1588
+ use_bias = self.config.bias_for_layer_norm
1589
+ if use_bias is None:
1590
+ use_bias = self.config.include_bias
1591
+ if use_bias:
1592
+ self.bias = nn.Parameter(bias_initializer(self.normalized_shape, device=config.init_device))
1593
+ else:
1594
+ self.register_parameter("bias", None)
1595
+ else:
1596
+ self.register_parameter("bias", None)
1597
+ self.register_parameter("weight", None)
1598
+
1599
+ @classmethod
1600
+ def build(cls, config: FullMolmoConfig, size: Optional[int] = None, **kwargs):
1601
+ if config.layer_norm_type == "default":
1602
+ return LayerNorm(config, size=size, low_precision=False, **kwargs)
1603
+ elif config.layer_norm_type == "low_precision":
1604
+ return LayerNorm(config, size=size, low_precision=True, **kwargs)
1605
+ elif config.layer_norm_type == "rms":
1606
+ return RMSLayerNorm(config, size=size, **kwargs)
1607
+ else:
1608
+ raise NotImplementedError(f"Unknown LayerNorm type: '{config.layer_norm_type}'")
1609
+
1610
+
1611
+ class RMSLayerNorm(LayerNormBase):
1612
+ """
1613
+ RMS layer norm, a simplified :class:`LayerNorm` implementation
1614
+ """
1615
+
1616
+ def __init__(
1617
+ self,
1618
+ config: FullMolmoConfig,
1619
+ size: Optional[int] = None,
1620
+ elementwise_affine: Optional[bool] = None,
1621
+ eps: float = 1e-5,
1622
+ ):
1623
+ super().__init__(config, size=size, elementwise_affine=elementwise_affine, eps=eps)
1624
+
1625
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
1626
+ with torch.autocast(enabled=False, device_type=x.device.type):
1627
+ og_dtype = x.dtype
1628
+ x = x.to(torch.float32)
1629
+ variance = x.pow(2).mean(-1, keepdim=True)
1630
+ x = x * torch.rsqrt(variance + self.eps)
1631
+ x = x.to(og_dtype)
1632
+
1633
+ if self.weight is not None:
1634
+ if self.bias is not None:
1635
+ return self.weight * x + self.bias
1636
+ else:
1637
+ return self.weight * x
1638
+ else:
1639
+ return x
1640
+
1641
+
1642
+ class LayerNorm(LayerNormBase):
1643
+ """
1644
+ The default :class:`LayerNorm` implementation which can optionally run in low precision.
1645
+ """
1646
+
1647
+ def __init__(
1648
+ self,
1649
+ config: FullMolmoConfig,
1650
+ size: Optional[int] = None,
1651
+ low_precision: bool = False,
1652
+ elementwise_affine: Optional[bool] = None,
1653
+ eps: float = 1e-05,
1654
+ ):
1655
+ super().__init__(config, size=size, elementwise_affine=elementwise_affine, eps=eps)
1656
+ self.low_precision = low_precision
1657
+
1658
+ def forward(self, x: torch.Tensor) -> torch.Tensor:
1659
+ if self.low_precision:
1660
+ module_device = x.device
1661
+ downcast_x = self._cast_if_autocast_enabled(x)
1662
+ downcast_weight = (
1663
+ self._cast_if_autocast_enabled(self.weight) if self.weight is not None else self.weight
1664
+ )
1665
+ downcast_bias = self._cast_if_autocast_enabled(self.bias) if self.bias is not None else self.bias
1666
+ with torch.autocast(enabled=False, device_type=module_device.type):
1667
+ return F.layer_norm(
1668
+ downcast_x, self.normalized_shape, weight=downcast_weight, bias=downcast_bias, eps=self.eps
1669
+ )
1670
+ else:
1671
+ return F.layer_norm(x, self.normalized_shape, weight=self.weight, bias=self.bias, eps=self.eps)
1672
+
1673
+
1674
+ class Molmo(nn.Module):
1675
+ def __init__(self, config: FullMolmoConfig, init_params: bool = True):
1676
+ super().__init__()
1677
+ self.config = config
1678
+ self.__cache = BufferCache()
1679
+
1680
+ # Validate config.
1681
+ if self.config.embedding_size is not None and self.config.embedding_size != self.config.vocab_size:
1682
+ if self.config.embedding_size < self.config.vocab_size:
1683
+ raise MolmoConfigurationError("embedding size should be at least as big as vocab size")
1684
+ elif self.config.embedding_size % 128 != 0:
1685
+ import warnings
1686
+
1687
+ warnings.warn(
1688
+ "Embedding size is not a multiple of 128! This could hurt throughput performance.", UserWarning
1689
+ )
1690
+ torch.backends.cuda.enable_flash_sdp(True)
1691
+ torch.backends.cuda.enable_mem_efficient_sdp(False) # this is super slow so make sure torch won't use it
1692
+
1693
+ wte = None
1694
+ if self.config.additional_vocab_size is not None:
1695
+ wte = Embedding(
1696
+ config.embedding_size or config.vocab_size,
1697
+ config.additional_vocab_size,
1698
+ config.d_model,
1699
+ device=config.init_device,
1700
+ initializer_range=config.initializer_range,
1701
+ new_embed_initializer_range=config.new_embedding_init_range
1702
+ )
1703
+ else:
1704
+ wte=nn.Embedding(
1705
+ config.embedding_size or config.vocab_size, config.d_model, device=config.init_device
1706
+ )
1707
+
1708
+ self.transformer = nn.ModuleDict(
1709
+ dict(
1710
+ wte=wte,
1711
+ emb_drop=Dropout(config.embedding_dropout),
1712
+ ln_f=LayerNorm.build(config),
1713
+ )
1714
+ )
1715
+
1716
+ blocks = [MolmoBlock.build(i, config, self.__cache) for i in range(config.n_layers)]
1717
+ if self.config.block_group_size > 1:
1718
+ raise NotImplementedError()
1719
+ else:
1720
+ self.transformer.update({"blocks": nn.ModuleList(blocks)})
1721
+
1722
+ if not self.config.rope:
1723
+ self.transformer.update(
1724
+ {"wpe": nn.Embedding(config.max_sequence_length, config.d_model, device=config.init_device)}
1725
+ )
1726
+ if not config.weight_tying:
1727
+ self.transformer.update(
1728
+ {
1729
+ "ff_out": nn.Linear(
1730
+ config.d_model,
1731
+ config.embedding_size or config.vocab_size,
1732
+ bias=config.include_bias,
1733
+ device=config.init_device,
1734
+ )
1735
+ }
1736
+ )
1737
+
1738
+ self.vision_backbone: Optional[OLMoVisionBackbone] = None
1739
+ if config.vision_backbone is not None:
1740
+ self.vision_backbone = OLMoPretrainedVisionBackbone(config)
1741
+
1742
+ self.__num_fwd_flops: Optional[int] = None
1743
+
1744
+ def reset_parameters(self):
1745
+ if self.vision_backbone is not None:
1746
+ self.vision_backbone.reset_parameters()
1747
+ self.reset_non_vision_parameters()
1748
+
1749
+ def reset_non_vision_parameters(self):
1750
+ self.transformer.wte.reset_parameters()
1751
+ if hasattr(self.transformer.wte, "new_embedding"):
1752
+ nn.init.normal_(self.transformer.wte.new_embedding, std=self.config.new_embedding_init_range)
1753
+
1754
+ if hasattr(self.transformer, "wpe"):
1755
+ nn.init.normal_(self.transformer.wpe, mean=0.0, std=1.0)
1756
+
1757
+ self.transformer.ln_f.reset_parameters() # type: ignore
1758
+
1759
+ if hasattr(self.transformer, "ff_out"):
1760
+ nn.init.normal_(self.transformer.ff_out, mean=0.0, std=0.02)
1761
+
1762
+ if self.config.block_group_size == 1:
1763
+ for block in self.transformer.blocks:
1764
+ block.reset_parameters()
1765
+ else:
1766
+ for block_group in self.transformer.block_groups:
1767
+ block_group.reset_parameters()
1768
+
1769
+
1770
+ def forward(
1771
+ self,
1772
+ input_ids: torch.LongTensor,
1773
+ input_embeddings: Optional[torch.FloatTensor] = None,
1774
+ attention_mask: Optional[torch.Tensor] = None,
1775
+ attention_bias: Optional[torch.Tensor] = None,
1776
+ response_mask: Optional[torch.Tensor] = None,
1777
+ images: Optional[torch.Tensor] = None,
1778
+ image_masks: Optional[torch.Tensor] = None,
1779
+ image_input_idx: Optional[torch.Tensor] = None,
1780
+ subsegment_ids: Optional[torch.Tensor] = None,
1781
+ position_ids: Optional[torch.Tensor] = None,
1782
+ past_key_values: Optional[Sequence[Tuple[torch.Tensor, torch.Tensor]]] = None,
1783
+ use_cache: bool = False,
1784
+ last_logits_only: bool = False,
1785
+ output_hidden_states: Optional[bool] = None,
1786
+ append_last_valid_logits: Optional[torch.Tensor] = None,
1787
+ ) -> ModelOutput:
1788
+ """
1789
+ :param input_ids: A tensor of shape `(batch_size, seq_len)`.
1790
+ :param input_embeddings: A tensor of shape `(batch_size, seq_len, d_model)` with input
1791
+ embeddings. When provided, it is treated as the output of the input embedding layer.
1792
+ :param attention_mask: A tensor of shape `(batch_size, seq_len)` that indicates
1793
+ which input IDs are masked. A `1` value in the mask means that
1794
+ the corresponding input ID should *not* be ignored. A `0` means
1795
+ that the corresponding input ID is masked.
1796
+
1797
+ This has the same meaning as the `attention_mask` in HuggingFace's `transformers`
1798
+ library.
1799
+ :param attention_bias: A tensor of shape `(batch_size, 1, seq_len, seq_len)`,
1800
+ `(1, 1, seq_len, seq_len)`, or `(seq_len, seq_len)`. This is used
1801
+ to introduce causal or other biases.
1802
+
1803
+ If the tensor is a bool or byte tensor, a `True` or `1` at `attention_bias[:, :, i, j]`
1804
+ indicates that the i-th element in the sequence is allowed to attend to the j-th
1805
+ element in the sequence.
1806
+
1807
+ If the tensor is a float tensor, it will just be added to the attention
1808
+ scores before the softmax.
1809
+
1810
+ The default is causal, which corresponds to a lower-diagonal byte matrix of ones.
1811
+ :param response_mask: A tensor of shape `(batch_size, seq_len)` that indicates
1812
+ the response mask. A `1` value in the mask means that the corresponding token
1813
+ is a response token. A `0` means that the corresponding token is not
1814
+ a response token.
1815
+ :param past_key_values: Pre-computed keys and values for each attention block.
1816
+ Can be used to speed up sequential decoding. The `input_ids` which have
1817
+ their past given to this model should not be passed as `input_ids` as they have already been computed.
1818
+ :param use_cache: If `True`, return key and value tensors for each block.
1819
+ :param last_logits_only: If `True`, only compute the logits for the last token of each sequence.
1820
+ This can speed up decoding when you only care about the next token.
1821
+ """
1822
+ output_hidden_states = output_hidden_states if output_hidden_states is not None else False
1823
+
1824
+ if past_key_values:
1825
+ assert len(past_key_values) == self.config.n_layers
1826
+
1827
+ has_image = images is not None
1828
+
1829
+ assert not (has_image and input_embeddings is not None), "Cannot provide both images and input embeddings."
1830
+ assert not (has_image and past_key_values is not None), "Cached key and values should not be used with images."
1831
+
1832
+ batch_size, seq_len = input_ids.size() if input_embeddings is None else input_embeddings.size()[:2]
1833
+ if past_key_values is None:
1834
+ past_length = 0
1835
+ else:
1836
+ past_length = past_key_values[0][0].size(-2)
1837
+
1838
+ if self.config.use_position_ids and attention_mask is None:
1839
+ attention_mask = input_ids != -1
1840
+
1841
+ if subsegment_ids is not None:
1842
+ assert not use_cache, "Subsegment_ids cannot be used with cache."
1843
+ subsegment_mask = subsegment_ids.unsqueeze(2) <= subsegment_ids.unsqueeze(1)
1844
+ attention_mask = (
1845
+ subsegment_mask.to(attention_mask.dtype) *
1846
+ attention_mask.unsqueeze(2) *
1847
+ attention_mask.unsqueeze(1))
1848
+ if position_ids is None:
1849
+ raise ValueError(f"Positioned ids must be given if using subsegment_ids")
1850
+ else:
1851
+ if self.config.use_position_ids and position_ids is None:
1852
+ position_ids = torch.clamp(
1853
+ torch.cumsum(attention_mask.to(torch.int32), dim=-1) - 1,
1854
+ min=0,
1855
+ ).broadcast_to((batch_size, attention_mask.shape[-1]))
1856
+
1857
+ # Get embeddings of input.
1858
+ # shape: (batch_size, seq_len, d_model)
1859
+ if input_ids is not None:
1860
+ input_ids = input_ids * (input_ids != -1).to(input_ids.dtype)
1861
+ x = self.transformer.wte(input_ids) if input_embeddings is None else input_embeddings # type: ignore
1862
+
1863
+ num_image: Optional[int] = None
1864
+ if images is not None:
1865
+ # shape: (batch_size, num_image, num_patch, d_model)
1866
+ # cls_embed: (batch_size, num_image, d_model)
1867
+ image_features, cls_embed = self.vision_backbone(images, image_masks)
1868
+ num_image, num_patch = image_features.shape[1:3]
1869
+ assert image_input_idx.shape == (batch_size, num_image, num_patch)
1870
+
1871
+ # inster the image feature into the embedding.
1872
+ image_features = image_features.view(batch_size, num_image * num_patch, -1)
1873
+ image_input_idx = image_input_idx.view(batch_size, num_image * num_patch)
1874
+
1875
+ valid = image_input_idx >= 0
1876
+ batch_idx = torch.arange(batch_size, device=x.device)
1877
+ batch_idx = torch.tile(batch_idx[:, None], [1, image_features.shape[1]])
1878
+
1879
+ # For hf demo/endpoint
1880
+ image_features = image_features.to(x.device)
1881
+
1882
+ x[batch_idx[valid], image_input_idx[valid]] += image_features[valid]
1883
+
1884
+ if not self.config.rope:
1885
+ # Get positional embeddings.
1886
+ # shape: (1, seq_len)
1887
+ pos = torch.arange(past_length, past_length + seq_len, dtype=torch.long, device=x.device).unsqueeze(0)
1888
+ # shape: (1, seq_len, d_model)
1889
+ pos_emb = self.transformer.wpe(pos) # type: ignore
1890
+ x = pos_emb + x
1891
+
1892
+ # Add input + positional embeddings and apply dropout.
1893
+ # shape: (batch_size, seq_len, d_model)
1894
+ x = self.transformer.emb_drop(x) # type: ignore
1895
+
1896
+ # normalized
1897
+ if self.config.normalize_input_embeds:
1898
+ x = x * (self.config.d_model ** 0.5)
1899
+
1900
+ # Transform the attention mask into what the blocks expect.
1901
+ if attention_mask is not None:
1902
+ # shape: (batch_size, 1, 1, seq_len)
1903
+ if len(attention_mask.shape) == 2:
1904
+ attention_mask = attention_mask[:, :past_length + seq_len]
1905
+ attention_mask = attention_mask.to(dtype=torch.float).view(batch_size, -1)[:, None, None, :]
1906
+ else:
1907
+ attention_mask = attention_mask.unsqueeze(1).to(dtype=torch.float)
1908
+ attention_mask = (1.0 - attention_mask) * torch.finfo(attention_mask.dtype).min
1909
+
1910
+ # Merge attention mask with attention bias.
1911
+ if (
1912
+ attention_bias is not None
1913
+ or attention_mask is not None
1914
+ # NOTE (epwalsh): we need to initialize the attn bias in order for attn to work properly
1915
+ # with key+value cache. Otherwise `F.scaled_dot_product_attention()` doesn't seem to compute
1916
+ # scores correctly.
1917
+ or past_key_values is not None
1918
+ ):
1919
+ if attention_bias is None:
1920
+ attention_bias = get_causal_attention_bias(self.__cache, past_length + seq_len, x.device)
1921
+ elif attention_bias.dtype in (torch.int8, torch.bool):
1922
+ attention_bias = attention_bias.to(dtype=torch.float)
1923
+ attention_bias.masked_fill_(attention_bias == 0.0, torch.finfo(attention_bias.dtype).min)
1924
+
1925
+ # Transform to the right shape and data type.
1926
+ mask_len = seq_len
1927
+ if attention_mask is not None:
1928
+ mask_len = attention_mask.shape[-1]
1929
+ elif past_key_values is not None:
1930
+ mask_len = past_key_values[0][0].shape[-2] + seq_len
1931
+ attention_bias = attention_bias[:, :, :mask_len, :mask_len].to(dtype=torch.float)
1932
+
1933
+ # Add in the masking bias.
1934
+ if attention_mask is not None:
1935
+ attention_bias = attention_bias + attention_mask
1936
+ # Might get -infs after adding attention mask, since dtype.min + dtype.min = -inf.
1937
+ # `F.scaled_dot_product_attention()` doesn't handle -inf like you'd expect, instead
1938
+ # it can produce NaNs.
1939
+ ensure_finite_(attention_bias, check_neg_inf=True, check_pos_inf=False)
1940
+
1941
+ attn_key_values: Optional[List[Tuple[torch.Tensor, torch.Tensor]]] = [] if use_cache else None
1942
+
1943
+ # decoder layers
1944
+ all_hidden_states = []
1945
+
1946
+ # Apply blocks one-by-one.
1947
+ if self.config.block_group_size == 1:
1948
+ for block_idx, block in enumerate(self.transformer.blocks):
1949
+ if output_hidden_states:
1950
+ # add hidden states
1951
+ all_hidden_states.append(x)
1952
+
1953
+ layer_past = None if past_key_values is None else past_key_values[block_idx]
1954
+ x, cache = block(x, attention_bias=attention_bias, position_ids=position_ids, layer_past=layer_past, use_cache=use_cache)
1955
+
1956
+ if attn_key_values is not None:
1957
+ assert cache is not None
1958
+ attn_key_values.append(cache)
1959
+ else:
1960
+ for group_idx, block_group in enumerate(self.transformer.block_groups):
1961
+ if output_hidden_states:
1962
+ # add hidden states
1963
+ all_hidden_states.append(x)
1964
+
1965
+ layers_past = (
1966
+ None
1967
+ if past_key_values is None
1968
+ else past_key_values[
1969
+ group_idx * self.config.block_group_size : (group_idx + 1) * self.config.block_group_size
1970
+ ]
1971
+ )
1972
+ x, cache = block_group(
1973
+ x, attention_bias=attention_bias, position_ids=position_ids, layers_past=layers_past, use_cache=use_cache
1974
+ )
1975
+ if attn_key_values is not None:
1976
+ assert cache is not None
1977
+ attn_key_values.extend(cache)
1978
+
1979
+ if last_logits_only:
1980
+ # shape: (batch_size, 1, d_model)
1981
+ if append_last_valid_logits is not None:
1982
+ last_valid_output = x[
1983
+ torch.arange(x.shape[0], device=x.device), append_last_valid_logits.to(x.device)]
1984
+ x = last_valid_output.unsqueeze(1)
1985
+ else:
1986
+ x = x[:, -1, :].unsqueeze(1)
1987
+
1988
+ # Apply final layer norm.
1989
+ # shape: (batch_size, seq_len or 1, d_model)
1990
+ x = self.transformer.ln_f(x) # type: ignore
1991
+ if output_hidden_states:
1992
+ # add final hidden state post-final-layernorm, following HuggingFace's convention
1993
+ all_hidden_states.append(x)
1994
+
1995
+ # Get logits.
1996
+ # shape: (batch_size, seq_len or 1, vocab_size)
1997
+ if self.config.weight_tying:
1998
+ logits = F.linear(x, self.transformer.wte.weight, None) # type: ignore
1999
+ else:
2000
+ logits = self.transformer.ff_out(x) # type: ignore
2001
+ if self.config.scale_logits:
2002
+ logits.mul_(1 / math.sqrt(self.config.d_model))
2003
+
2004
+ if not last_logits_only and append_last_valid_logits is not None:
2005
+ last_valid_logit = logits[
2006
+ torch.arange(logits.shape[0], device=logits.device), append_last_valid_logits]
2007
+ logits = torch.cat([logits[:, :-1], last_valid_logit[:, None]], dim=1)
2008
+
2009
+ return ModelOutput(logits=logits, attn_key_values=attn_key_values, hidden_states=tuple(all_hidden_states) if output_hidden_states else None) # type: ignore[arg-type]
2010
+
2011
+
2012
+ class MolmoForCausalLM(PreTrainedModel):
2013
+ config_class = MolmoConfig
2014
+ base_model_prefix = "model"
2015
+ _no_split_modules = ["MolmoBlock"]
2016
+
2017
+ def __init__(self, config: MolmoConfig, model: Optional[Molmo] = None, init_params: bool = False):
2018
+ super().__init__(config)
2019
+
2020
+ if not model:
2021
+ full_config = FullMolmoConfig(
2022
+ image_padding_embed="pad_and_partial_pad",
2023
+ image_pooling_2d="attention-meanq",
2024
+ attention_layer_norm=config.attention_layer_norm,
2025
+ rope_impl="llama",
2026
+ vocab_size=config.vocab_size,
2027
+ max_sequence_length=config.max_position_embeddings,
2028
+ qkv_bias=config.qkv_bias,
2029
+ norm_after=config.norm_after,
2030
+ embedding_size=config.embedding_size,
2031
+ attention_type="sdpa",
2032
+ embedding_dropout=0,
2033
+ attention_dropout=0,
2034
+ residual_dropout=0,
2035
+ rope=True,
2036
+ weight_tying=False,
2037
+ include_bias=False,
2038
+ d_model=config.hidden_size,
2039
+ mlp_hidden_size=config.intermediate_size,
2040
+ n_layers=config.num_hidden_layers,
2041
+ additional_vocab_size=128,
2042
+ n_heads=config.num_attention_heads,
2043
+ n_kv_heads=config.num_key_value_heads,
2044
+ rope_theta=config.rope_theta,
2045
+ layer_norm_eps=config.layer_norm_eps,
2046
+ layer_norm_type=config.layer_norm_type,
2047
+ vit_layers=[-2, -9],
2048
+ vision_backbone=VisionBackboneConfig(
2049
+ image_default_input_size=(336, 336),
2050
+ image_patch_size=14,
2051
+ image_pos_patch_size=14,
2052
+ image_emb_dim=1024,
2053
+ image_num_heads=16,
2054
+ image_num_key_value_heads=16,
2055
+ image_num_layers=23,
2056
+ image_head_dim=64,
2057
+ image_mlp_dim=4096,
2058
+ image_mlp_activations="quick_gelu",
2059
+ image_dropout_rate=0.0,
2060
+ image_num_pos=577,
2061
+ image_norm_eps=1e-5,
2062
+ attention_dropout=0.0,
2063
+ residual_dropout=0.0,
2064
+ initializer_range=0.02,
2065
+ )
2066
+ )
2067
+ self.model = Molmo(full_config, init_params=init_params)
2068
+ else:
2069
+ self.model = model
2070
+
2071
+
2072
+ def forward(
2073
+ self,
2074
+ input_ids: torch.LongTensor = None,
2075
+ inputs_embeds: Optional[torch.FloatTensor] = None,
2076
+ attention_mask: Optional[torch.Tensor] = None,
2077
+ attention_bias: Optional[torch.Tensor] = None,
2078
+ response_mask: Optional[torch.Tensor] = None,
2079
+ images: Optional[torch.Tensor] = None,
2080
+ image_masks: Optional[torch.Tensor] = None,
2081
+ image_input_idx: Optional[torch.Tensor] = None,
2082
+ subsegment_ids: Optional[torch.Tensor] = None,
2083
+ position_ids: Optional[torch.Tensor] = None,
2084
+ past_key_values: Optional[List[torch.FloatTensor]] = None,
2085
+ labels: Optional[torch.LongTensor] = None,
2086
+ loss_masks: Optional[torch.Tensor] = None,
2087
+ use_cache: Optional[bool] = None,
2088
+ last_logits_only: Optional[bool] = None,
2089
+ output_attentions: Optional[bool] = None,
2090
+ output_hidden_states: Optional[bool] = None,
2091
+ append_last_valid_logits: Optional[torch.Tensor] = None,
2092
+ return_dict: Optional[bool] = None,
2093
+ cache_position: Optional[
2094
+ Cache
2095
+ ] = None, # This is a hack mitigation of an issue in transformers `4.39.x` https://github.com/huggingface/transformers/issues/29426
2096
+ ) -> Union[Tuple, CausalLMOutputWithPast]:
2097
+ if use_cache is None:
2098
+ use_cache = self.config.use_cache
2099
+
2100
+ if output_attentions:
2101
+ raise ValueError("output_attentions is not yet supported in Molmo")
2102
+
2103
+ return_dict = return_dict if return_dict is not None else self.config.use_return_dict
2104
+
2105
+ # decoder outputs consists of (dec_features, layer_state, dec_hidden, dec_attn)
2106
+ outputs = self.model.forward(
2107
+ input_ids=input_ids,
2108
+ input_embeddings=inputs_embeds,
2109
+ attention_mask=attention_mask,
2110
+ attention_bias=attention_bias,
2111
+ response_mask=response_mask,
2112
+ images=images,
2113
+ image_masks=image_masks,
2114
+ image_input_idx=image_input_idx,
2115
+ subsegment_ids=subsegment_ids,
2116
+ position_ids=position_ids,
2117
+ past_key_values=past_key_values,
2118
+ use_cache=use_cache,
2119
+ last_logits_only=last_logits_only,
2120
+ output_hidden_states=output_hidden_states,
2121
+ append_last_valid_logits=append_last_valid_logits,
2122
+ )
2123
+
2124
+ logits = outputs.logits
2125
+ hidden_states = outputs.hidden_states
2126
+
2127
+ loss = None
2128
+ if labels is not None:
2129
+ if loss_masks is not None:
2130
+ loss_masks = loss_masks * (loss_masks > 0)
2131
+ batch_size_in_tokens = max(loss_masks.sum().item(), 1)
2132
+ labels = labels.long()
2133
+ labels.masked_fill_(~(loss_masks > 0), -100)
2134
+ labels = labels.view(-1)
2135
+ logits_for_loss = logits.to(torch.float32).view(-1, logits.size(-1))
2136
+ loss_fct = torch.nn.CrossEntropyLoss(ignore_index=-100, reduction='none')
2137
+ loss = loss_fct(logits_for_loss, labels)
2138
+ loss = loss.view(input_ids.shape[0], -1)
2139
+ loss = loss * loss_masks
2140
+ loss = loss.sum() / batch_size_in_tokens
2141
+ use_zloss = getattr(self.config, "softmax_auxiliary_loss", False)
2142
+ if use_zloss:
2143
+ z_squared = logits_for_loss.logsumexp(-1).pow(2)
2144
+ z_loss = self.config.softmax_auxiliary_loss_scale * z_squared
2145
+ z_loss = z_loss.view(input_ids.shape[0], -1)
2146
+ z_loss = z_loss * loss_masks
2147
+ z_loss = z_loss.sum() / batch_size_in_tokens
2148
+ loss += z_loss
2149
+ else:
2150
+ # Shift so that tokens < n predict n
2151
+ shift_logits = logits[..., :-1, :].contiguous()
2152
+ shift_labels = labels[..., 1:].contiguous()
2153
+ # Flatten the tokens
2154
+ loss_fct = torch.nn.CrossEntropyLoss()
2155
+ shift_logits = shift_logits.view(-1, self.config.embedding_size)
2156
+ shift_labels = shift_labels.view(-1)
2157
+ # Enable model parallelism
2158
+ shift_labels = shift_labels.to(shift_logits.device)
2159
+ loss = loss_fct(shift_logits, shift_labels)
2160
+
2161
+ if not return_dict:
2162
+ output = (logits,) + outputs[1:]
2163
+ return (loss,) + output if loss is not None else output
2164
+
2165
+ return CausalLMOutputWithPast(
2166
+ loss=loss,
2167
+ logits=logits,
2168
+ past_key_values=outputs.attn_key_values,
2169
+ hidden_states=hidden_states,
2170
+ )
2171
+
2172
+ def can_generate(self) -> bool:
2173
+ return True
2174
+
2175
+ @torch.no_grad()
2176
+ def generate_from_batch(
2177
+ self,
2178
+ batch: Dict[str, Any],
2179
+ generation_config: Optional[GenerationConfig] = None,
2180
+ **kwargs,
2181
+ ):
2182
+ if generation_config is not None:
2183
+ assert generation_config.use_cache
2184
+
2185
+ images = batch.get("images")
2186
+ image_masks = batch.get("image_masks")
2187
+ image_input_idx = batch.get("image_input_idx")
2188
+
2189
+ # Validate inputs.
2190
+ input_ids = batch["input_ids"]
2191
+ batch_size, seq_len = input_ids.shape
2192
+ attention_mask = batch.get("attention_mask", None)
2193
+ max_new_tokens = generation_config.max_new_tokens
2194
+ assert max_new_tokens is not None
2195
+ mask_len = seq_len + max_new_tokens if self.config.use_position_ids else seq_len
2196
+ position_ids: Optional[torch.Tensor] = None
2197
+ append_last_valid_logits: Optional[torch.Tensor] = None
2198
+ if self.config.use_position_ids and attention_mask is None:
2199
+ attention_mask = input_ids != -1
2200
+ position_ids = torch.clamp(
2201
+ torch.cumsum(attention_mask.to(torch.int32), dim=-1) - 1,
2202
+ min=0
2203
+ )
2204
+ append_last_valid_logits = attention_mask.long().sum(dim=-1) - 1
2205
+ attention_mask = torch.cat(
2206
+ [attention_mask, attention_mask.new_ones((batch_size, max_new_tokens))],
2207
+ dim=1,
2208
+ )
2209
+ if attention_mask is not None:
2210
+ assert attention_mask.shape == (batch_size, mask_len)
2211
+
2212
+ out = super().generate(
2213
+ batch["input_ids"],
2214
+ generation_config,
2215
+ attention_mask=attention_mask,
2216
+ images=images,
2217
+ image_masks=image_masks,
2218
+ image_input_idx=image_input_idx,
2219
+ position_ids=position_ids,
2220
+ append_last_valid_logits=append_last_valid_logits,
2221
+ **kwargs,
2222
+ )
2223
+
2224
+ return out
2225
+
2226
+ def prepare_inputs_for_generation(
2227
+ self, input_ids: torch.LongTensor, past_key_values: Optional[List[Tuple]] = None, **kwargs
2228
+ ):
2229
+ if past_key_values:
2230
+ # This is because we want the model to only process the last generated token.
2231
+ input_ids = input_ids[:, -1:]
2232
+
2233
+ if self.config.use_position_ids:
2234
+ attention_mask = kwargs.get("attention_mask")
2235
+ images = kwargs.get("images")
2236
+ image_masks = kwargs.get("image_masks")
2237
+ image_input_idx = kwargs.get("image_input_idx")
2238
+ position_ids = kwargs.get("position_ids")
2239
+ append_last_valid_logits = kwargs.get("append_last_valid_logits")
2240
+ model_inputs = {
2241
+ "input_ids": input_ids,
2242
+ "attention_mask": attention_mask,
2243
+ "position_ids": position_ids,
2244
+ "past_key_values": past_key_values,
2245
+ "use_cache": True,
2246
+ "last_logits_only": True,
2247
+ }
2248
+ if past_key_values is None:
2249
+ model_inputs["images"] = images
2250
+ model_inputs["image_masks"] = image_masks
2251
+ model_inputs["image_input_idx"] = image_input_idx
2252
+ model_inputs["append_last_valid_logits"] = append_last_valid_logits
2253
+ else:
2254
+ model_inputs = {"input_ids": input_ids, "past_key_values": past_key_values}
2255
+
2256
+ model_inputs.update(kwargs)
2257
+ model_inputs["use_cache"] = kwargs.pop("use_cache", self.config.use_cache)
2258
+ return model_inputs
2259
+
2260
+ def _update_model_kwargs_for_generation(
2261
+ self,
2262
+ outputs: ModelOutput,
2263
+ model_kwargs: Dict[str, Any],
2264
+ is_encoder_decoder: bool = False,
2265
+ num_new_tokens: int = 1,
2266
+ ) -> Dict[str, Any]:
2267
+ if self.config.use_position_ids:
2268
+ model_kwargs["position_ids"] = model_kwargs["position_ids"][:, -1:] + 1
2269
+ if "append_last_valid_logits" in model_kwargs:
2270
+ del model_kwargs["append_last_valid_logits"]
2271
+ if "images" in model_kwargs:
2272
+ del model_kwargs["images"]
2273
+ del model_kwargs["image_masks"]
2274
+ del model_kwargs["image_input_idx"]
2275
+ cache_name, cache = super()._extract_past_from_model_output(outputs)
2276
+ model_kwargs[cache_name] = cache
2277
+ model_kwargs["cache_position"] = model_kwargs["cache_position"][-1:] + num_new_tokens
2278
+ return model_kwargs
2279
+
2280
+ def get_input_embeddings(self) -> torch.nn.Module:
2281
+ return self.model.transformer.wte
2282
+
2283
+ def set_input_embeddings(self, value: torch.nn.Module):
2284
+ self.model.transformer.wte = value
2285
+
2286
+ def get_output_embeddings(self):
2287
+ if self.config.weight_tying:
2288
+ return self.model.transformer.wte
2289
+ else:
2290
+ return self.model.transformer.ff_out
2291
+
2292
+ def set_output_embeddings(self, value: torch.nn.Module):
2293
+ if self.config.weight_tying:
2294
+ self.model.transformer.wte = value
2295
+ else:
2296
+ self.model.transformer.ff_out = value
2297
+
2298
+ def tie_weights(self):
2299
+ """
2300
+ This function is intentionally left as a no-op.
2301
+
2302
+ Weight tying is handled as follows:
2303
+ - When the model is initialized, the `ff_out` layer is conditionally defined based on the `weight_tying` configuration.
2304
+ See: `if not config.weight_tying: self.transformer.update(...)` in `olmo/model.py`.
2305
+ - When computing logits, the `wte` weights are used directly if `weight_tying` is enabled.
2306
+ See: `if self.config.weight_tying: logits = F.linear(x, self.transformer.wte.weight, None)` in the `forward` method.
2307
+
2308
+ Therefore, there is no need to explicitly tie the weights in this function.
2309
+ """
2310
+ pass
2311
+
2312
+ def resize_token_embeddings(
2313
+ self, new_num_tokens: Optional[int] = None, pad_to_multiple_of: Optional[int] = None
2314
+ ) -> torch.nn.Embedding:
2315
+ """
2316
+ Resizes input token embeddings matrix of the model if `new_num_tokens != config.embedding_size`.
2317
+
2318
+ Takes care of tying weights embeddings afterwards if the model class has a `tie_weights()` method.
2319
+
2320
+ Arguments:
2321
+ new_num_tokens (`int`, *optional*):
2322
+ The new number of tokens in the embedding matrix. Increasing the size will add newly initialized
2323
+ vectors at the end. Reducing the size will remove vectors from the end. If not provided or `None`, just
2324
+ returns a pointer to the input tokens `torch.nn.Embedding` module of the model without doing anything.
2325
+ pad_to_multiple_of (`int`, *optional*):
2326
+ If set will pad the embedding matrix to a multiple of the provided value. If `new_num_tokens` is set to
2327
+ `None` will just pad the embedding to a multiple of `pad_to_multiple_of`.
2328
+
2329
+ This is especially useful to enable the use of Tensor Cores on NVIDIA hardware with compute capability
2330
+ `>= 7.5` (Volta), or on TPUs which benefit from having sequence lengths be a multiple of 128. For more
2331
+ details about this, or help on choosing the correct value for resizing, refer to this guide:
2332
+ https://docs.nvidia.com/deeplearning/performance/dl-performance-matrix-multiplication/index.html#requirements-tc
2333
+
2334
+ Return:
2335
+ `torch.nn.Embedding`: Pointer to the input tokens Embeddings Module of the model.
2336
+
2337
+ Note:
2338
+ This method differs from the base class implementation by resizing the `embedding_size` attribute of the
2339
+ model configuration instead of the `vocab_size`. It also includes a warning if the resized `embedding_size`
2340
+ is less than the `vocab_size`. In OLMo, `embedding_size` refers to the dimensionality of the model's token
2341
+ embeddings, while `vocab_size` refers to the number of unique tokens in the vocabulary.
2342
+ """
2343
+ model_embeds = self._resize_token_embeddings(new_num_tokens, pad_to_multiple_of)
2344
+ if new_num_tokens is None and pad_to_multiple_of is None:
2345
+ return model_embeds
2346
+
2347
+ # Update base model and current model config
2348
+ self.config.embedding_size = model_embeds.weight.shape[0]
2349
+ self.model.config.embedding_size = model_embeds.weight.shape[0]
2350
+
2351
+ # Check if the embedding size is less than the vocab size
2352
+ if self.config.embedding_size < self.config.vocab_size:
2353
+ warning_message = (
2354
+ f"Resizing token embeddings to size {self.config.embedding_size}, which is less than the vocab size "
2355
+ f"{self.config.vocab_size} defined in the model configuration. Make sure your tokenizer's vocabulary "
2356
+ "size is less than or equal to the new token embedding size."
2357
+ )
2358
+ log.warning(warning_message)
2359
+
2360
+ # Tie weights again if needed
2361
+ self.tie_weights()
2362
+
2363
+ return model_embeds
2364
+
2365
+
2366
+ # Always register for multi-modal features
2367
+ AutoModelForCausalLM.register(MolmoConfig, MolmoForCausalLM)
preprocessing_molmo.py ADDED
@@ -0,0 +1,192 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Processor class for Molmo.
3
+ """
4
+
5
+ from typing import Optional
6
+
7
+ import PIL
8
+ from PIL import ImageOps
9
+ from PIL.Image import Image
10
+
11
+ try:
12
+ from typing import Unpack
13
+ except ImportError:
14
+ from typing_extensions import Unpack
15
+
16
+ import numpy as np
17
+ import torch
18
+
19
+ from transformers.image_utils import ImageInput
20
+ from transformers.processing_utils import (
21
+ TextKwargs,
22
+ ProcessingKwargs,
23
+ ProcessorMixin,
24
+ )
25
+
26
+ from transformers.tokenization_utils_base import TextInput, PreTokenizedInput
27
+ from transformers.utils import logging
28
+
29
+ from transformers import AutoTokenizer
30
+ from .image_preprocessing_molmo import MolmoImagesKwargs, MolmoImageProcessor
31
+
32
+
33
+ logger = logging.get_logger(__name__)
34
+
35
+
36
+ DEFAULT_IMAGE_PATCH_TOKEN = f"<im_patch>"
37
+ DEFAULT_IM_START_TOKEN = f"<im_start>"
38
+ DEFAULT_IM_END_TOKEN = f"<im_end>"
39
+ DEFAULT_IM_COL_TOKEN = f"<im_col>"
40
+ IMAGE_PROMPT = "<|image|>"
41
+
42
+ EXTRA_TOKENS = (DEFAULT_IM_START_TOKEN, DEFAULT_IM_END_TOKEN, DEFAULT_IMAGE_PATCH_TOKEN, DEFAULT_IM_COL_TOKEN, IMAGE_PROMPT)
43
+
44
+
45
+ def get_special_token_ids(tokenizer):
46
+ ids = tokenizer.encode("".join(EXTRA_TOKENS), add_special_tokens=False)
47
+ assert len(ids) == len(EXTRA_TOKENS)
48
+ return {k: i for k, i in zip(EXTRA_TOKENS, ids)}
49
+
50
+
51
+ class MolmoTextKwargs(TextKwargs, total=False):
52
+ style: Optional[str]
53
+ system_prompt: Optional[str]
54
+ message_format: Optional[str]
55
+ always_start_with_space: Optional[bool]
56
+ sequence_length: Optional[int]
57
+
58
+
59
+ class MolmoProcessorKwargs(ProcessingKwargs, total=False):
60
+ text_kwargs: MolmoTextKwargs
61
+ images_kwargs: MolmoImagesKwargs
62
+ _defaults = {
63
+ "images_kwargs": {
64
+ "max_crops": 12,
65
+ "overlap_margins": [4, 4],
66
+ "base_image_input_size": [336, 336],
67
+ "image_token_length_w": 12,
68
+ "image_token_length_h": 12,
69
+ "image_patch_size": 14,
70
+ "image_padding_mask": True,
71
+ },
72
+ "text_kwargs": {
73
+ "style": "long_caption",
74
+ "system_prompt": "none",
75
+ "message_format": "role",
76
+ "always_start_with_space": True,
77
+ "sequence_length": 1536,
78
+ "padding": False,
79
+ },
80
+ }
81
+
82
+
83
+ class MolmoProcessor(ProcessorMixin):
84
+ attributes = ["image_processor", "tokenizer"]
85
+ image_processor_class = "AutoImageProcessor"
86
+ tokenizer_class = ("Qwen2Tokenizer", "Qwen2TokenizerFast")
87
+
88
+ def __init__(self, image_processor: MolmoImageProcessor = None, tokenizer : AutoTokenizer = None, **kwargs):
89
+ # self.image_processor = image_processor
90
+ # self.tokenizer = tokenizer
91
+ super().__init__(image_processor, tokenizer)
92
+ self._special_tokens = None
93
+
94
+ @property
95
+ def special_token_ids(self):
96
+ if self._special_tokens is None:
97
+ self._special_tokens = get_special_token_ids(self.tokenizer)
98
+ return self._special_tokens
99
+
100
+ def get_tokens_input(self, prompt, message_format, always_start_with_space):
101
+ if message_format == "none" or message_format is None:
102
+ pass
103
+ elif message_format == "role":
104
+ prompt = "User: " + prompt + " Assistant:"
105
+ else:
106
+ raise NotImplementedError(f"Message format {message_format} not implemented")
107
+
108
+ if always_start_with_space:
109
+ prompt = " " + prompt
110
+
111
+ tokens = self.tokenizer.encode(prompt, add_special_tokens=False)
112
+
113
+ return tokens
114
+
115
+ def process(
116
+ self,
117
+ text: TextInput = None,
118
+ images: ImageInput = None,
119
+ *,
120
+ tokens: Optional[PreTokenizedInput] = None,
121
+ **kwargs: Unpack[MolmoProcessorKwargs],
122
+ ):
123
+ output_kwargs = self._merge_kwargs(
124
+ MolmoProcessorKwargs,
125
+ tokenizer_init_kwargs=self.tokenizer.init_kwargs,
126
+ **kwargs,
127
+ )
128
+
129
+ if tokens is None:
130
+ tokens = self.get_tokens_input(
131
+ text,
132
+ output_kwargs["text_kwargs"]["message_format"],
133
+ output_kwargs["text_kwargs"]["always_start_with_space"],
134
+ )
135
+
136
+ image_token_id = self.special_token_ids[IMAGE_PROMPT]
137
+
138
+ if images is not None:
139
+ if not isinstance(images, (list, tuple)):
140
+ images = [images]
141
+ image_arrays = []
142
+ for image in images:
143
+ if isinstance(image, Image):
144
+ image = image.convert("RGB")
145
+ # Handle images with EXIF orientation tags, which PIL will ignore by default
146
+ # https://github.com/python-pillow/Pillow/issues/4703
147
+ img = ImageOps.exif_transpose(image)
148
+ image_arrays.append(np.array(image))
149
+ else:
150
+ assert len(image.shape) == 3 and image.shape[-1] == 3
151
+ image_arrays.append(image.astype(np.uint8))
152
+ images = image_arrays
153
+ # For now only support inserting images at the start
154
+ image_idx = [-1]*len(images)
155
+ else:
156
+ image_idx = None
157
+
158
+ sequence_length = output_kwargs["text_kwargs"]["sequence_length"]
159
+
160
+ image_patch_token_id = self.special_token_ids[DEFAULT_IMAGE_PATCH_TOKEN]
161
+ image_col_token_id = self.special_token_ids[DEFAULT_IM_COL_TOKEN]
162
+ image_start_token_id = self.special_token_ids[DEFAULT_IM_START_TOKEN]
163
+ image_end_token_id = self.special_token_ids[DEFAULT_IM_END_TOKEN]
164
+ out = self.image_processor.multimodal_preprocess(
165
+ images=images,
166
+ image_idx=image_idx,
167
+ tokens=np.asarray(tokens).astype(np.int32),
168
+ sequence_length=sequence_length,
169
+ image_patch_token_id=image_patch_token_id,
170
+ image_col_token_id=image_col_token_id,
171
+ image_start_token_id=image_start_token_id,
172
+ image_end_token_id=image_end_token_id,
173
+ **output_kwargs["images_kwargs"]
174
+ )
175
+
176
+ # Prepend BOS
177
+ # qwen2 and olmo do not have a BOS, and instead use EOS as a generic seperator token.
178
+ bos = self.tokenizer.bos_token_id or self.tokenizer.eos_token_id
179
+ decoder_input_tokens = np.pad(out["input_ids"], [[1, 0]], constant_values=bos)
180
+ out["input_ids"] = decoder_input_tokens
181
+ if "image_input_idx" in out:
182
+ # Shift patch mapping up by one since we added BOS
183
+ image_input_idx = out["image_input_idx"]
184
+ out["image_input_idx"] = np.where(image_input_idx < 0, image_input_idx, image_input_idx + 1)
185
+
186
+ for k, v in out.items():
187
+ out[k] = torch.from_numpy(v)
188
+
189
+ return out
190
+
191
+
192
+ MolmoProcessor.register_for_auto_class()
preprocessor_config.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_map": {
3
+ "AutoImageProcessor": "image_preprocessing_molmo.MolmoImageProcessor",
4
+ "AutoProcessor": "preprocessing_molmo.MolmoProcessor"
5
+ },
6
+ "base_image_input_size": [
7
+ 336,
8
+ 336
9
+ ],
10
+ "do_normalize": true,
11
+ "image_mean": [
12
+ 0.48145466,
13
+ 0.4578275,
14
+ 0.40821073
15
+ ],
16
+ "image_padding_mask": true,
17
+ "image_patch_size": 14,
18
+ "image_processor_type": "MolmoImageProcessor",
19
+ "image_std": [
20
+ 0.26862954,
21
+ 0.26130258,
22
+ 0.27577711
23
+ ],
24
+ "image_token_length_h": 12,
25
+ "image_token_length_w": 12,
26
+ "max_crops": 12,
27
+ "overlap_margins": [
28
+ 4,
29
+ 4
30
+ ],
31
+ "processor_class": "MolmoProcessor"
32
+ }
processor_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "auto_map": {
3
+ "AutoProcessor": "preprocessing_molmo.MolmoProcessor"
4
+ },
5
+ "processor_class": "MolmoProcessor"
6
+ }
recipe.yaml ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ DEFAULT_stage:
2
+ DEFAULT_modifiers:
3
+ QuantizationModifier:
4
+ ignore: ['re:model.vision_backbone*']
5
+ targets: Linear
6
+ scheme: FP8_DYNAMIC
special_tokens_map.json ADDED
@@ -0,0 +1,441 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "|<EXTRA_TOKENS_0>|",
4
+ "|<EXTRA_TOKENS_1>|",
5
+ "|<EXTRA_TOKENS_2>|",
6
+ "|<EXTRA_TOKENS_3>|",
7
+ "|<EXTRA_TOKENS_4>|",
8
+ "|<EXTRA_TOKENS_5>|",
9
+ "|<EXTRA_TOKENS_6>|",
10
+ "|<EXTRA_TOKENS_7>|",
11
+ "|<EXTRA_TOKENS_8>|",
12
+ "|<EXTRA_TOKENS_9>|",
13
+ "|<EXTRA_TOKENS_10>|",
14
+ "|<EXTRA_TOKENS_11>|",
15
+ "|<EXTRA_TOKENS_12>|",
16
+ "|<EXTRA_TOKENS_13>|",
17
+ "|<EXTRA_TOKENS_14>|",
18
+ "|<EXTRA_TOKENS_15>|",
19
+ "|<EXTRA_TOKENS_16>|",
20
+ "|<EXTRA_TOKENS_17>|",
21
+ "|<EXTRA_TOKENS_18>|",
22
+ "|<EXTRA_TOKENS_19>|",
23
+ "|<EXTRA_TOKENS_20>|",
24
+ "|<EXTRA_TOKENS_21>|",
25
+ "|<EXTRA_TOKENS_22>|",
26
+ "|<EXTRA_TOKENS_23>|",
27
+ "|<EXTRA_TOKENS_24>|",
28
+ "|<EXTRA_TOKENS_25>|",
29
+ "|<EXTRA_TOKENS_26>|",
30
+ "|<EXTRA_TOKENS_27>|",
31
+ "|<EXTRA_TOKENS_28>|",
32
+ "|<EXTRA_TOKENS_29>|",
33
+ "|<EXTRA_TOKENS_30>|",
34
+ "|<EXTRA_TOKENS_31>|",
35
+ "|<EXTRA_TOKENS_32>|",
36
+ "|<EXTRA_TOKENS_33>|",
37
+ "|<EXTRA_TOKENS_34>|",
38
+ "|<EXTRA_TOKENS_35>|",
39
+ "|<EXTRA_TOKENS_36>|",
40
+ "|<EXTRA_TOKENS_37>|",
41
+ "|<EXTRA_TOKENS_38>|",
42
+ "|<EXTRA_TOKENS_39>|",
43
+ "|<EXTRA_TOKENS_40>|",
44
+ "|<EXTRA_TOKENS_41>|",
45
+ "|<EXTRA_TOKENS_42>|",
46
+ "|<EXTRA_TOKENS_43>|",
47
+ "|<EXTRA_TOKENS_44>|",
48
+ "|<EXTRA_TOKENS_45>|",
49
+ "|<EXTRA_TOKENS_46>|",
50
+ "|<EXTRA_TOKENS_47>|",
51
+ "|<EXTRA_TOKENS_48>|",
52
+ "|<EXTRA_TOKENS_49>|",
53
+ "|<EXTRA_TOKENS_50>|",
54
+ "|<EXTRA_TOKENS_51>|",
55
+ "|<EXTRA_TOKENS_52>|",
56
+ "|<EXTRA_TOKENS_53>|",
57
+ "|<EXTRA_TOKENS_54>|",
58
+ "|<EXTRA_TOKENS_55>|",
59
+ "|<EXTRA_TOKENS_56>|",
60
+ "|<EXTRA_TOKENS_57>|",
61
+ "|<EXTRA_TOKENS_58>|",
62
+ "|<EXTRA_TOKENS_59>|",
63
+ "|<EXTRA_TOKENS_60>|",
64
+ "|<EXTRA_TOKENS_61>|",
65
+ "|<EXTRA_TOKENS_62>|",
66
+ "|<EXTRA_TOKENS_63>|",
67
+ "|<EXTRA_TOKENS_64>|",
68
+ "|<EXTRA_TOKENS_65>|",
69
+ "|<EXTRA_TOKENS_66>|",
70
+ "|<EXTRA_TOKENS_67>|",
71
+ "|<EXTRA_TOKENS_68>|",
72
+ "|<EXTRA_TOKENS_69>|",
73
+ "|<EXTRA_TOKENS_70>|",
74
+ "|<EXTRA_TOKENS_71>|",
75
+ "|<EXTRA_TOKENS_72>|",
76
+ "|<EXTRA_TOKENS_73>|",
77
+ "|<EXTRA_TOKENS_74>|",
78
+ "|<EXTRA_TOKENS_75>|",
79
+ "|<EXTRA_TOKENS_76>|",
80
+ "|<EXTRA_TOKENS_77>|",
81
+ "|<EXTRA_TOKENS_78>|",
82
+ "|<EXTRA_TOKENS_79>|",
83
+ "|<EXTRA_TOKENS_80>|",
84
+ "|<EXTRA_TOKENS_81>|",
85
+ "|<EXTRA_TOKENS_82>|",
86
+ "|<EXTRA_TOKENS_83>|",
87
+ "|<EXTRA_TOKENS_84>|",
88
+ "|<EXTRA_TOKENS_85>|",
89
+ "|<EXTRA_TOKENS_86>|",
90
+ "|<EXTRA_TOKENS_87>|",
91
+ "|<EXTRA_TOKENS_88>|",
92
+ "|<EXTRA_TOKENS_89>|",
93
+ "|<EXTRA_TOKENS_90>|",
94
+ "|<EXTRA_TOKENS_91>|",
95
+ "|<EXTRA_TOKENS_92>|",
96
+ "|<EXTRA_TOKENS_93>|",
97
+ "|<EXTRA_TOKENS_94>|",
98
+ "|<EXTRA_TOKENS_95>|",
99
+ "|<EXTRA_TOKENS_96>|",
100
+ "|<EXTRA_TOKENS_97>|",
101
+ "|<EXTRA_TOKENS_98>|",
102
+ "|<EXTRA_TOKENS_99>|",
103
+ "|<EXTRA_TOKENS_100>|",
104
+ "|<EXTRA_TOKENS_101>|",
105
+ "|<EXTRA_TOKENS_102>|",
106
+ "|<EXTRA_TOKENS_103>|",
107
+ "|<EXTRA_TOKENS_104>|",
108
+ "|<EXTRA_TOKENS_105>|",
109
+ "|<EXTRA_TOKENS_106>|",
110
+ "|<EXTRA_TOKENS_107>|",
111
+ "|<EXTRA_TOKENS_108>|",
112
+ "|<EXTRA_TOKENS_109>|",
113
+ "|<EXTRA_TOKENS_110>|",
114
+ "|<EXTRA_TOKENS_111>|",
115
+ "|<EXTRA_TOKENS_112>|",
116
+ "|<EXTRA_TOKENS_113>|",
117
+ "|<EXTRA_TOKENS_114>|",
118
+ "|<EXTRA_TOKENS_115>|",
119
+ "|<EXTRA_TOKENS_116>|",
120
+ "|<EXTRA_TOKENS_117>|",
121
+ "|<EXTRA_TOKENS_118>|",
122
+ "|<EXTRA_TOKENS_119>|",
123
+ "|<EXTRA_TOKENS_120>|",
124
+ "|<EXTRA_TOKENS_121>|",
125
+ "|<EXTRA_TOKENS_122>|",
126
+ "|<EXTRA_TOKENS_123>|",
127
+ "|<EXTRA_TOKENS_124>|",
128
+ "|<EXTRA_TOKENS_125>|",
129
+ "|<EXTRA_TOKENS_126>|",
130
+ "|<EXTRA_TOKENS_127>|",
131
+ "|<EXTRA_TOKENS_128>|",
132
+ "|<EXTRA_TOKENS_129>|",
133
+ "|<EXTRA_TOKENS_130>|",
134
+ "|<EXTRA_TOKENS_131>|",
135
+ "|<EXTRA_TOKENS_132>|",
136
+ "|<EXTRA_TOKENS_133>|",
137
+ "|<EXTRA_TOKENS_134>|",
138
+ "|<EXTRA_TOKENS_135>|",
139
+ "|<EXTRA_TOKENS_136>|",
140
+ "|<EXTRA_TOKENS_137>|",
141
+ "|<EXTRA_TOKENS_138>|",
142
+ "|<EXTRA_TOKENS_139>|",
143
+ "|<EXTRA_TOKENS_140>|",
144
+ "|<EXTRA_TOKENS_141>|",
145
+ "|<EXTRA_TOKENS_142>|",
146
+ "|<EXTRA_TOKENS_143>|",
147
+ "|<EXTRA_TOKENS_144>|",
148
+ "|<EXTRA_TOKENS_145>|",
149
+ "|<EXTRA_TOKENS_146>|",
150
+ "|<EXTRA_TOKENS_147>|",
151
+ "|<EXTRA_TOKENS_148>|",
152
+ "|<EXTRA_TOKENS_149>|",
153
+ "|<EXTRA_TOKENS_150>|",
154
+ "|<EXTRA_TOKENS_151>|",
155
+ "|<EXTRA_TOKENS_152>|",
156
+ "|<EXTRA_TOKENS_153>|",
157
+ "|<EXTRA_TOKENS_154>|",
158
+ "|<EXTRA_TOKENS_155>|",
159
+ "|<EXTRA_TOKENS_156>|",
160
+ "|<EXTRA_TOKENS_157>|",
161
+ "|<EXTRA_TOKENS_158>|",
162
+ "|<EXTRA_TOKENS_159>|",
163
+ "|<EXTRA_TOKENS_160>|",
164
+ "|<EXTRA_TOKENS_161>|",
165
+ "|<EXTRA_TOKENS_162>|",
166
+ "|<EXTRA_TOKENS_163>|",
167
+ "|<EXTRA_TOKENS_164>|",
168
+ "|<EXTRA_TOKENS_165>|",
169
+ "|<EXTRA_TOKENS_166>|",
170
+ "|<EXTRA_TOKENS_167>|",
171
+ "|<EXTRA_TOKENS_168>|",
172
+ "|<EXTRA_TOKENS_169>|",
173
+ "|<EXTRA_TOKENS_170>|",
174
+ "|<EXTRA_TOKENS_171>|",
175
+ "|<EXTRA_TOKENS_172>|",
176
+ "|<EXTRA_TOKENS_173>|",
177
+ "|<EXTRA_TOKENS_174>|",
178
+ "|<EXTRA_TOKENS_175>|",
179
+ "|<EXTRA_TOKENS_176>|",
180
+ "|<EXTRA_TOKENS_177>|",
181
+ "|<EXTRA_TOKENS_178>|",
182
+ "|<EXTRA_TOKENS_179>|",
183
+ "|<EXTRA_TOKENS_180>|",
184
+ "|<EXTRA_TOKENS_181>|",
185
+ "|<EXTRA_TOKENS_182>|",
186
+ "|<EXTRA_TOKENS_183>|",
187
+ "|<EXTRA_TOKENS_184>|",
188
+ "|<EXTRA_TOKENS_185>|",
189
+ "|<EXTRA_TOKENS_186>|",
190
+ "|<EXTRA_TOKENS_187>|",
191
+ "|<EXTRA_TOKENS_188>|",
192
+ "|<EXTRA_TOKENS_189>|",
193
+ "|<EXTRA_TOKENS_190>|",
194
+ "|<EXTRA_TOKENS_191>|",
195
+ "|<EXTRA_TOKENS_192>|",
196
+ "|<EXTRA_TOKENS_193>|",
197
+ "|<EXTRA_TOKENS_194>|",
198
+ "|<EXTRA_TOKENS_195>|",
199
+ "|<EXTRA_TOKENS_196>|",
200
+ "|<EXTRA_TOKENS_197>|",
201
+ "|<EXTRA_TOKENS_198>|",
202
+ "|<EXTRA_TOKENS_199>|",
203
+ "|<EXTRA_TOKENS_200>|",
204
+ "|<EXTRA_TOKENS_201>|",
205
+ "|<EXTRA_TOKENS_202>|",
206
+ "|<EXTRA_TOKENS_203>|",
207
+ "|<EXTRA_TOKENS_204>|",
208
+ "|<EXTRA_TOKENS_205>|",
209
+ "|<EXTRA_TOKENS_206>|",
210
+ "|<EXTRA_TOKENS_207>|",
211
+ "|<EXTRA_TOKENS_208>|",
212
+ "|<EXTRA_TOKENS_209>|",
213
+ "|<EXTRA_TOKENS_210>|",
214
+ "|<EXTRA_TOKENS_211>|",
215
+ "|<EXTRA_TOKENS_212>|",
216
+ "|<EXTRA_TOKENS_213>|",
217
+ "|<EXTRA_TOKENS_214>|",
218
+ "|<EXTRA_TOKENS_215>|",
219
+ "|<EXTRA_TOKENS_216>|",
220
+ "|<EXTRA_TOKENS_217>|",
221
+ "|<EXTRA_TOKENS_218>|",
222
+ "|<EXTRA_TOKENS_219>|",
223
+ "|<EXTRA_TOKENS_220>|",
224
+ "|<EXTRA_TOKENS_221>|",
225
+ "|<EXTRA_TOKENS_222>|",
226
+ "|<EXTRA_TOKENS_223>|",
227
+ "|<EXTRA_TOKENS_224>|",
228
+ "|<EXTRA_TOKENS_225>|",
229
+ "|<EXTRA_TOKENS_226>|",
230
+ "|<EXTRA_TOKENS_227>|",
231
+ "|<EXTRA_TOKENS_228>|",
232
+ "|<EXTRA_TOKENS_229>|",
233
+ "|<EXTRA_TOKENS_230>|",
234
+ "|<EXTRA_TOKENS_231>|",
235
+ "|<EXTRA_TOKENS_232>|",
236
+ "|<EXTRA_TOKENS_233>|",
237
+ "|<EXTRA_TOKENS_234>|",
238
+ "|<EXTRA_TOKENS_235>|",
239
+ "|<EXTRA_TOKENS_236>|",
240
+ "|<EXTRA_TOKENS_237>|",
241
+ "|<EXTRA_TOKENS_238>|",
242
+ "|<EXTRA_TOKENS_239>|",
243
+ "|<EXTRA_TOKENS_240>|",
244
+ "|<EXTRA_TOKENS_241>|",
245
+ "|<EXTRA_TOKENS_242>|",
246
+ "|<EXTRA_TOKENS_243>|",
247
+ "|<EXTRA_TOKENS_244>|",
248
+ "|<EXTRA_TOKENS_245>|",
249
+ "|<EXTRA_TOKENS_246>|",
250
+ "|<EXTRA_TOKENS_247>|",
251
+ "|<EXTRA_TOKENS_248>|",
252
+ "|<EXTRA_TOKENS_249>|",
253
+ "|<EXTRA_TOKENS_250>|",
254
+ "|<EXTRA_TOKENS_251>|",
255
+ "|<EXTRA_TOKENS_252>|",
256
+ "|<EXTRA_TOKENS_253>|",
257
+ "|<EXTRA_TOKENS_254>|",
258
+ "|<EXTRA_TOKENS_255>|",
259
+ "|<EXTRA_TOKENS_256>|",
260
+ "|<EXTRA_TOKENS_257>|",
261
+ "|<EXTRA_TOKENS_258>|",
262
+ "|<EXTRA_TOKENS_259>|",
263
+ "|<EXTRA_TOKENS_260>|",
264
+ "|<EXTRA_TOKENS_261>|",
265
+ "|<EXTRA_TOKENS_262>|",
266
+ "|<EXTRA_TOKENS_263>|",
267
+ "|<EXTRA_TOKENS_264>|",
268
+ "|<EXTRA_TOKENS_265>|",
269
+ "|<EXTRA_TOKENS_266>|",
270
+ "|<EXTRA_TOKENS_267>|",
271
+ "|<EXTRA_TOKENS_268>|",
272
+ "|<EXTRA_TOKENS_269>|",
273
+ "|<EXTRA_TOKENS_270>|",
274
+ "|<EXTRA_TOKENS_271>|",
275
+ "|<EXTRA_TOKENS_272>|",
276
+ "|<EXTRA_TOKENS_273>|",
277
+ "|<EXTRA_TOKENS_274>|",
278
+ "|<EXTRA_TOKENS_275>|",
279
+ "|<EXTRA_TOKENS_276>|",
280
+ "|<EXTRA_TOKENS_277>|",
281
+ "|<EXTRA_TOKENS_278>|",
282
+ "|<EXTRA_TOKENS_279>|",
283
+ "|<EXTRA_TOKENS_280>|",
284
+ "|<EXTRA_TOKENS_281>|",
285
+ "|<EXTRA_TOKENS_282>|",
286
+ "|<EXTRA_TOKENS_283>|",
287
+ "|<EXTRA_TOKENS_284>|",
288
+ "|<EXTRA_TOKENS_285>|",
289
+ "|<EXTRA_TOKENS_286>|",
290
+ "|<EXTRA_TOKENS_287>|",
291
+ "|<EXTRA_TOKENS_288>|",
292
+ "|<EXTRA_TOKENS_289>|",
293
+ "|<EXTRA_TOKENS_290>|",
294
+ "|<EXTRA_TOKENS_291>|",
295
+ "|<EXTRA_TOKENS_292>|",
296
+ "|<EXTRA_TOKENS_293>|",
297
+ "|<EXTRA_TOKENS_294>|",
298
+ "|<EXTRA_TOKENS_295>|",
299
+ "|<EXTRA_TOKENS_296>|",
300
+ "|<EXTRA_TOKENS_297>|",
301
+ "|<EXTRA_TOKENS_298>|",
302
+ "|<EXTRA_TOKENS_299>|",
303
+ "|<EXTRA_TOKENS_300>|",
304
+ "|<EXTRA_TOKENS_301>|",
305
+ "|<EXTRA_TOKENS_302>|",
306
+ "|<EXTRA_TOKENS_303>|",
307
+ "|<EXTRA_TOKENS_304>|",
308
+ "|<EXTRA_TOKENS_305>|",
309
+ "|<EXTRA_TOKENS_306>|",
310
+ "|<EXTRA_TOKENS_307>|",
311
+ "|<EXTRA_TOKENS_308>|",
312
+ "|<EXTRA_TOKENS_309>|",
313
+ "|<EXTRA_TOKENS_310>|",
314
+ "|<EXTRA_TOKENS_311>|",
315
+ "|<EXTRA_TOKENS_312>|",
316
+ "|<EXTRA_TOKENS_313>|",
317
+ "|<EXTRA_TOKENS_314>|",
318
+ "|<EXTRA_TOKENS_315>|",
319
+ "|<EXTRA_TOKENS_316>|",
320
+ "|<EXTRA_TOKENS_317>|",
321
+ "|<EXTRA_TOKENS_318>|",
322
+ "|<EXTRA_TOKENS_319>|",
323
+ "|<EXTRA_TOKENS_320>|",
324
+ "|<EXTRA_TOKENS_321>|",
325
+ "|<EXTRA_TOKENS_322>|",
326
+ "|<EXTRA_TOKENS_323>|",
327
+ "|<EXTRA_TOKENS_324>|",
328
+ "|<EXTRA_TOKENS_325>|",
329
+ "|<EXTRA_TOKENS_326>|",
330
+ "|<EXTRA_TOKENS_327>|",
331
+ "|<EXTRA_TOKENS_328>|",
332
+ "|<EXTRA_TOKENS_329>|",
333
+ "|<EXTRA_TOKENS_330>|",
334
+ "|<EXTRA_TOKENS_331>|",
335
+ "|<EXTRA_TOKENS_332>|",
336
+ "|<EXTRA_TOKENS_333>|",
337
+ "|<EXTRA_TOKENS_334>|",
338
+ "|<EXTRA_TOKENS_335>|",
339
+ "|<EXTRA_TOKENS_336>|",
340
+ "|<EXTRA_TOKENS_337>|",
341
+ "|<EXTRA_TOKENS_338>|",
342
+ "|<EXTRA_TOKENS_339>|",
343
+ "|<EXTRA_TOKENS_340>|",
344
+ "|<EXTRA_TOKENS_341>|",
345
+ "|<EXTRA_TOKENS_342>|",
346
+ "|<EXTRA_TOKENS_343>|",
347
+ "|<EXTRA_TOKENS_344>|",
348
+ "|<EXTRA_TOKENS_345>|",
349
+ "|<EXTRA_TOKENS_346>|",
350
+ "|<EXTRA_TOKENS_347>|",
351
+ "|<EXTRA_TOKENS_348>|",
352
+ "|<EXTRA_TOKENS_349>|",
353
+ "|<EXTRA_TOKENS_350>|",
354
+ "|<EXTRA_TOKENS_351>|",
355
+ "|<EXTRA_TOKENS_352>|",
356
+ "|<EXTRA_TOKENS_353>|",
357
+ "|<EXTRA_TOKENS_354>|",
358
+ "|<EXTRA_TOKENS_355>|",
359
+ "|<EXTRA_TOKENS_356>|",
360
+ "|<EXTRA_TOKENS_357>|",
361
+ "|<EXTRA_TOKENS_358>|",
362
+ "|<EXTRA_TOKENS_359>|",
363
+ "|<EXTRA_TOKENS_360>|",
364
+ "|<EXTRA_TOKENS_361>|",
365
+ "|<EXTRA_TOKENS_362>|",
366
+ "|<EXTRA_TOKENS_363>|",
367
+ "|<EXTRA_TOKENS_364>|",
368
+ "|<EXTRA_TOKENS_365>|",
369
+ "|<EXTRA_TOKENS_366>|",
370
+ "|<EXTRA_TOKENS_367>|",
371
+ "|<EXTRA_TOKENS_368>|",
372
+ "|<EXTRA_TOKENS_369>|",
373
+ "|<EXTRA_TOKENS_370>|",
374
+ "|<EXTRA_TOKENS_371>|",
375
+ "|<EXTRA_TOKENS_372>|",
376
+ "|<EXTRA_TOKENS_373>|",
377
+ "|<EXTRA_TOKENS_374>|",
378
+ "|<EXTRA_TOKENS_375>|",
379
+ "|<EXTRA_TOKENS_376>|",
380
+ "|<EXTRA_TOKENS_377>|",
381
+ "|<EXTRA_TOKENS_378>|",
382
+ "|<EXTRA_TOKENS_379>|",
383
+ "|<EXTRA_TOKENS_380>|",
384
+ "|<EXTRA_TOKENS_381>|",
385
+ "|<EXTRA_TOKENS_382>|",
386
+ "|<EXTRA_TOKENS_383>|",
387
+ "|<EXTRA_TOKENS_384>|",
388
+ "|<EXTRA_TOKENS_385>|",
389
+ "|<EXTRA_TOKENS_386>|",
390
+ "|<EXTRA_TOKENS_387>|",
391
+ "|<EXTRA_TOKENS_388>|",
392
+ "|<EXTRA_TOKENS_389>|",
393
+ "|<EXTRA_TOKENS_390>|",
394
+ "|<EXTRA_TOKENS_391>|",
395
+ "|<EXTRA_TOKENS_392>|",
396
+ "|<EXTRA_TOKENS_393>|",
397
+ "|<EXTRA_TOKENS_394>|",
398
+ "|<EXTRA_TOKENS_395>|",
399
+ "|<EXTRA_TOKENS_396>|",
400
+ "|<EXTRA_TOKENS_397>|",
401
+ "|<EXTRA_TOKENS_398>|",
402
+ "|<EXTRA_TOKENS_399>|",
403
+ "|<EXTRA_TOKENS_400>|",
404
+ "|<EXTRA_TOKENS_401>|",
405
+ "|<EXTRA_TOKENS_402>|",
406
+ "|<EXTRA_TOKENS_403>|",
407
+ "|<EXTRA_TOKENS_404>|",
408
+ "|<EXTRA_TOKENS_405>|",
409
+ "|<EXTRA_TOKENS_406>|",
410
+ "|<EXTRA_TOKENS_407>|",
411
+ "|<EXTRA_TOKENS_408>|",
412
+ "|<EXTRA_TOKENS_409>|",
413
+ "|<EXTRA_TOKENS_410>|",
414
+ "|<EXTRA_TOKENS_411>|",
415
+ "|<EXTRA_TOKENS_412>|",
416
+ "|<EXTRA_TOKENS_413>|",
417
+ "|<EXTRA_TOKENS_414>|",
418
+ "|<EXTRA_TOKENS_415>|",
419
+ "|<EXTRA_TOKENS_416>|",
420
+ "|<EXTRA_TOKENS_417>|",
421
+ "<im_start>",
422
+ "<im_end>",
423
+ "<im_patch>",
424
+ "<im_col>",
425
+ "<|image|>"
426
+ ],
427
+ "eos_token": {
428
+ "content": "<|endoftext|>",
429
+ "lstrip": false,
430
+ "normalized": false,
431
+ "rstrip": false,
432
+ "single_word": false
433
+ },
434
+ "pad_token": {
435
+ "content": "<|endoftext|>",
436
+ "lstrip": false,
437
+ "normalized": false,
438
+ "rstrip": false,
439
+ "single_word": false
440
+ }
441
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6248048a83152ce87663c799492fe7e60c8086f3ae51ce7bd255ccc445746fc0
3
+ size 11501432
tokenizer_config.json ADDED
@@ -0,0 +1,3852 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "151643": {
5
+ "content": "<|endoftext|>",
6
+ "lstrip": false,
7
+ "normalized": false,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "151644": {
13
+ "content": "<|im_start|>",
14
+ "lstrip": false,
15
+ "normalized": false,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "151645": {
21
+ "content": "<|im_end|>",
22
+ "lstrip": false,
23
+ "normalized": false,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "151646": {
29
+ "content": "|<EXTRA_TOKENS_0>|",
30
+ "lstrip": false,
31
+ "normalized": false,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "151647": {
37
+ "content": "|<EXTRA_TOKENS_1>|",
38
+ "lstrip": false,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ },
44
+ "151648": {
45
+ "content": "|<EXTRA_TOKENS_2>|",
46
+ "lstrip": false,
47
+ "normalized": false,
48
+ "rstrip": false,
49
+ "single_word": false,
50
+ "special": true
51
+ },
52
+ "151649": {
53
+ "content": "|<EXTRA_TOKENS_3>|",
54
+ "lstrip": false,
55
+ "normalized": false,
56
+ "rstrip": false,
57
+ "single_word": false,
58
+ "special": true
59
+ },
60
+ "151650": {
61
+ "content": "|<EXTRA_TOKENS_4>|",
62
+ "lstrip": false,
63
+ "normalized": false,
64
+ "rstrip": false,
65
+ "single_word": false,
66
+ "special": true
67
+ },
68
+ "151651": {
69
+ "content": "|<EXTRA_TOKENS_5>|",
70
+ "lstrip": false,
71
+ "normalized": false,
72
+ "rstrip": false,
73
+ "single_word": false,
74
+ "special": true
75
+ },
76
+ "151652": {
77
+ "content": "|<EXTRA_TOKENS_6>|",
78
+ "lstrip": false,
79
+ "normalized": false,
80
+ "rstrip": false,
81
+ "single_word": false,
82
+ "special": true
83
+ },
84
+ "151653": {
85
+ "content": "|<EXTRA_TOKENS_7>|",
86
+ "lstrip": false,
87
+ "normalized": false,
88
+ "rstrip": false,
89
+ "single_word": false,
90
+ "special": true
91
+ },
92
+ "151654": {
93
+ "content": "|<EXTRA_TOKENS_8>|",
94
+ "lstrip": false,
95
+ "normalized": false,
96
+ "rstrip": false,
97
+ "single_word": false,
98
+ "special": true
99
+ },
100
+ "151655": {
101
+ "content": "|<EXTRA_TOKENS_9>|",
102
+ "lstrip": false,
103
+ "normalized": false,
104
+ "rstrip": false,
105
+ "single_word": false,
106
+ "special": true
107
+ },
108
+ "151656": {
109
+ "content": "|<EXTRA_TOKENS_10>|",
110
+ "lstrip": false,
111
+ "normalized": false,
112
+ "rstrip": false,
113
+ "single_word": false,
114
+ "special": true
115
+ },
116
+ "151657": {
117
+ "content": "|<EXTRA_TOKENS_11>|",
118
+ "lstrip": false,
119
+ "normalized": false,
120
+ "rstrip": false,
121
+ "single_word": false,
122
+ "special": true
123
+ },
124
+ "151658": {
125
+ "content": "|<EXTRA_TOKENS_12>|",
126
+ "lstrip": false,
127
+ "normalized": false,
128
+ "rstrip": false,
129
+ "single_word": false,
130
+ "special": true
131
+ },
132
+ "151659": {
133
+ "content": "|<EXTRA_TOKENS_13>|",
134
+ "lstrip": false,
135
+ "normalized": false,
136
+ "rstrip": false,
137
+ "single_word": false,
138
+ "special": true
139
+ },
140
+ "151660": {
141
+ "content": "|<EXTRA_TOKENS_14>|",
142
+ "lstrip": false,
143
+ "normalized": false,
144
+ "rstrip": false,
145
+ "single_word": false,
146
+ "special": true
147
+ },
148
+ "151661": {
149
+ "content": "|<EXTRA_TOKENS_15>|",
150
+ "lstrip": false,
151
+ "normalized": false,
152
+ "rstrip": false,
153
+ "single_word": false,
154
+ "special": true
155
+ },
156
+ "151662": {
157
+ "content": "|<EXTRA_TOKENS_16>|",
158
+ "lstrip": false,
159
+ "normalized": false,
160
+ "rstrip": false,
161
+ "single_word": false,
162
+ "special": true
163
+ },
164
+ "151663": {
165
+ "content": "|<EXTRA_TOKENS_17>|",
166
+ "lstrip": false,
167
+ "normalized": false,
168
+ "rstrip": false,
169
+ "single_word": false,
170
+ "special": true
171
+ },
172
+ "151664": {
173
+ "content": "|<EXTRA_TOKENS_18>|",
174
+ "lstrip": false,
175
+ "normalized": false,
176
+ "rstrip": false,
177
+ "single_word": false,
178
+ "special": true
179
+ },
180
+ "151665": {
181
+ "content": "|<EXTRA_TOKENS_19>|",
182
+ "lstrip": false,
183
+ "normalized": false,
184
+ "rstrip": false,
185
+ "single_word": false,
186
+ "special": true
187
+ },
188
+ "151666": {
189
+ "content": "|<EXTRA_TOKENS_20>|",
190
+ "lstrip": false,
191
+ "normalized": false,
192
+ "rstrip": false,
193
+ "single_word": false,
194
+ "special": true
195
+ },
196
+ "151667": {
197
+ "content": "|<EXTRA_TOKENS_21>|",
198
+ "lstrip": false,
199
+ "normalized": false,
200
+ "rstrip": false,
201
+ "single_word": false,
202
+ "special": true
203
+ },
204
+ "151668": {
205
+ "content": "|<EXTRA_TOKENS_22>|",
206
+ "lstrip": false,
207
+ "normalized": false,
208
+ "rstrip": false,
209
+ "single_word": false,
210
+ "special": true
211
+ },
212
+ "151669": {
213
+ "content": "|<EXTRA_TOKENS_23>|",
214
+ "lstrip": false,
215
+ "normalized": false,
216
+ "rstrip": false,
217
+ "single_word": false,
218
+ "special": true
219
+ },
220
+ "151670": {
221
+ "content": "|<EXTRA_TOKENS_24>|",
222
+ "lstrip": false,
223
+ "normalized": false,
224
+ "rstrip": false,
225
+ "single_word": false,
226
+ "special": true
227
+ },
228
+ "151671": {
229
+ "content": "|<EXTRA_TOKENS_25>|",
230
+ "lstrip": false,
231
+ "normalized": false,
232
+ "rstrip": false,
233
+ "single_word": false,
234
+ "special": true
235
+ },
236
+ "151672": {
237
+ "content": "|<EXTRA_TOKENS_26>|",
238
+ "lstrip": false,
239
+ "normalized": false,
240
+ "rstrip": false,
241
+ "single_word": false,
242
+ "special": true
243
+ },
244
+ "151673": {
245
+ "content": "|<EXTRA_TOKENS_27>|",
246
+ "lstrip": false,
247
+ "normalized": false,
248
+ "rstrip": false,
249
+ "single_word": false,
250
+ "special": true
251
+ },
252
+ "151674": {
253
+ "content": "|<EXTRA_TOKENS_28>|",
254
+ "lstrip": false,
255
+ "normalized": false,
256
+ "rstrip": false,
257
+ "single_word": false,
258
+ "special": true
259
+ },
260
+ "151675": {
261
+ "content": "|<EXTRA_TOKENS_29>|",
262
+ "lstrip": false,
263
+ "normalized": false,
264
+ "rstrip": false,
265
+ "single_word": false,
266
+ "special": true
267
+ },
268
+ "151676": {
269
+ "content": "|<EXTRA_TOKENS_30>|",
270
+ "lstrip": false,
271
+ "normalized": false,
272
+ "rstrip": false,
273
+ "single_word": false,
274
+ "special": true
275
+ },
276
+ "151677": {
277
+ "content": "|<EXTRA_TOKENS_31>|",
278
+ "lstrip": false,
279
+ "normalized": false,
280
+ "rstrip": false,
281
+ "single_word": false,
282
+ "special": true
283
+ },
284
+ "151678": {
285
+ "content": "|<EXTRA_TOKENS_32>|",
286
+ "lstrip": false,
287
+ "normalized": false,
288
+ "rstrip": false,
289
+ "single_word": false,
290
+ "special": true
291
+ },
292
+ "151679": {
293
+ "content": "|<EXTRA_TOKENS_33>|",
294
+ "lstrip": false,
295
+ "normalized": false,
296
+ "rstrip": false,
297
+ "single_word": false,
298
+ "special": true
299
+ },
300
+ "151680": {
301
+ "content": "|<EXTRA_TOKENS_34>|",
302
+ "lstrip": false,
303
+ "normalized": false,
304
+ "rstrip": false,
305
+ "single_word": false,
306
+ "special": true
307
+ },
308
+ "151681": {
309
+ "content": "|<EXTRA_TOKENS_35>|",
310
+ "lstrip": false,
311
+ "normalized": false,
312
+ "rstrip": false,
313
+ "single_word": false,
314
+ "special": true
315
+ },
316
+ "151682": {
317
+ "content": "|<EXTRA_TOKENS_36>|",
318
+ "lstrip": false,
319
+ "normalized": false,
320
+ "rstrip": false,
321
+ "single_word": false,
322
+ "special": true
323
+ },
324
+ "151683": {
325
+ "content": "|<EXTRA_TOKENS_37>|",
326
+ "lstrip": false,
327
+ "normalized": false,
328
+ "rstrip": false,
329
+ "single_word": false,
330
+ "special": true
331
+ },
332
+ "151684": {
333
+ "content": "|<EXTRA_TOKENS_38>|",
334
+ "lstrip": false,
335
+ "normalized": false,
336
+ "rstrip": false,
337
+ "single_word": false,
338
+ "special": true
339
+ },
340
+ "151685": {
341
+ "content": "|<EXTRA_TOKENS_39>|",
342
+ "lstrip": false,
343
+ "normalized": false,
344
+ "rstrip": false,
345
+ "single_word": false,
346
+ "special": true
347
+ },
348
+ "151686": {
349
+ "content": "|<EXTRA_TOKENS_40>|",
350
+ "lstrip": false,
351
+ "normalized": false,
352
+ "rstrip": false,
353
+ "single_word": false,
354
+ "special": true
355
+ },
356
+ "151687": {
357
+ "content": "|<EXTRA_TOKENS_41>|",
358
+ "lstrip": false,
359
+ "normalized": false,
360
+ "rstrip": false,
361
+ "single_word": false,
362
+ "special": true
363
+ },
364
+ "151688": {
365
+ "content": "|<EXTRA_TOKENS_42>|",
366
+ "lstrip": false,
367
+ "normalized": false,
368
+ "rstrip": false,
369
+ "single_word": false,
370
+ "special": true
371
+ },
372
+ "151689": {
373
+ "content": "|<EXTRA_TOKENS_43>|",
374
+ "lstrip": false,
375
+ "normalized": false,
376
+ "rstrip": false,
377
+ "single_word": false,
378
+ "special": true
379
+ },
380
+ "151690": {
381
+ "content": "|<EXTRA_TOKENS_44>|",
382
+ "lstrip": false,
383
+ "normalized": false,
384
+ "rstrip": false,
385
+ "single_word": false,
386
+ "special": true
387
+ },
388
+ "151691": {
389
+ "content": "|<EXTRA_TOKENS_45>|",
390
+ "lstrip": false,
391
+ "normalized": false,
392
+ "rstrip": false,
393
+ "single_word": false,
394
+ "special": true
395
+ },
396
+ "151692": {
397
+ "content": "|<EXTRA_TOKENS_46>|",
398
+ "lstrip": false,
399
+ "normalized": false,
400
+ "rstrip": false,
401
+ "single_word": false,
402
+ "special": true
403
+ },
404
+ "151693": {
405
+ "content": "|<EXTRA_TOKENS_47>|",
406
+ "lstrip": false,
407
+ "normalized": false,
408
+ "rstrip": false,
409
+ "single_word": false,
410
+ "special": true
411
+ },
412
+ "151694": {
413
+ "content": "|<EXTRA_TOKENS_48>|",
414
+ "lstrip": false,
415
+ "normalized": false,
416
+ "rstrip": false,
417
+ "single_word": false,
418
+ "special": true
419
+ },
420
+ "151695": {
421
+ "content": "|<EXTRA_TOKENS_49>|",
422
+ "lstrip": false,
423
+ "normalized": false,
424
+ "rstrip": false,
425
+ "single_word": false,
426
+ "special": true
427
+ },
428
+ "151696": {
429
+ "content": "|<EXTRA_TOKENS_50>|",
430
+ "lstrip": false,
431
+ "normalized": false,
432
+ "rstrip": false,
433
+ "single_word": false,
434
+ "special": true
435
+ },
436
+ "151697": {
437
+ "content": "|<EXTRA_TOKENS_51>|",
438
+ "lstrip": false,
439
+ "normalized": false,
440
+ "rstrip": false,
441
+ "single_word": false,
442
+ "special": true
443
+ },
444
+ "151698": {
445
+ "content": "|<EXTRA_TOKENS_52>|",
446
+ "lstrip": false,
447
+ "normalized": false,
448
+ "rstrip": false,
449
+ "single_word": false,
450
+ "special": true
451
+ },
452
+ "151699": {
453
+ "content": "|<EXTRA_TOKENS_53>|",
454
+ "lstrip": false,
455
+ "normalized": false,
456
+ "rstrip": false,
457
+ "single_word": false,
458
+ "special": true
459
+ },
460
+ "151700": {
461
+ "content": "|<EXTRA_TOKENS_54>|",
462
+ "lstrip": false,
463
+ "normalized": false,
464
+ "rstrip": false,
465
+ "single_word": false,
466
+ "special": true
467
+ },
468
+ "151701": {
469
+ "content": "|<EXTRA_TOKENS_55>|",
470
+ "lstrip": false,
471
+ "normalized": false,
472
+ "rstrip": false,
473
+ "single_word": false,
474
+ "special": true
475
+ },
476
+ "151702": {
477
+ "content": "|<EXTRA_TOKENS_56>|",
478
+ "lstrip": false,
479
+ "normalized": false,
480
+ "rstrip": false,
481
+ "single_word": false,
482
+ "special": true
483
+ },
484
+ "151703": {
485
+ "content": "|<EXTRA_TOKENS_57>|",
486
+ "lstrip": false,
487
+ "normalized": false,
488
+ "rstrip": false,
489
+ "single_word": false,
490
+ "special": true
491
+ },
492
+ "151704": {
493
+ "content": "|<EXTRA_TOKENS_58>|",
494
+ "lstrip": false,
495
+ "normalized": false,
496
+ "rstrip": false,
497
+ "single_word": false,
498
+ "special": true
499
+ },
500
+ "151705": {
501
+ "content": "|<EXTRA_TOKENS_59>|",
502
+ "lstrip": false,
503
+ "normalized": false,
504
+ "rstrip": false,
505
+ "single_word": false,
506
+ "special": true
507
+ },
508
+ "151706": {
509
+ "content": "|<EXTRA_TOKENS_60>|",
510
+ "lstrip": false,
511
+ "normalized": false,
512
+ "rstrip": false,
513
+ "single_word": false,
514
+ "special": true
515
+ },
516
+ "151707": {
517
+ "content": "|<EXTRA_TOKENS_61>|",
518
+ "lstrip": false,
519
+ "normalized": false,
520
+ "rstrip": false,
521
+ "single_word": false,
522
+ "special": true
523
+ },
524
+ "151708": {
525
+ "content": "|<EXTRA_TOKENS_62>|",
526
+ "lstrip": false,
527
+ "normalized": false,
528
+ "rstrip": false,
529
+ "single_word": false,
530
+ "special": true
531
+ },
532
+ "151709": {
533
+ "content": "|<EXTRA_TOKENS_63>|",
534
+ "lstrip": false,
535
+ "normalized": false,
536
+ "rstrip": false,
537
+ "single_word": false,
538
+ "special": true
539
+ },
540
+ "151710": {
541
+ "content": "|<EXTRA_TOKENS_64>|",
542
+ "lstrip": false,
543
+ "normalized": false,
544
+ "rstrip": false,
545
+ "single_word": false,
546
+ "special": true
547
+ },
548
+ "151711": {
549
+ "content": "|<EXTRA_TOKENS_65>|",
550
+ "lstrip": false,
551
+ "normalized": false,
552
+ "rstrip": false,
553
+ "single_word": false,
554
+ "special": true
555
+ },
556
+ "151712": {
557
+ "content": "|<EXTRA_TOKENS_66>|",
558
+ "lstrip": false,
559
+ "normalized": false,
560
+ "rstrip": false,
561
+ "single_word": false,
562
+ "special": true
563
+ },
564
+ "151713": {
565
+ "content": "|<EXTRA_TOKENS_67>|",
566
+ "lstrip": false,
567
+ "normalized": false,
568
+ "rstrip": false,
569
+ "single_word": false,
570
+ "special": true
571
+ },
572
+ "151714": {
573
+ "content": "|<EXTRA_TOKENS_68>|",
574
+ "lstrip": false,
575
+ "normalized": false,
576
+ "rstrip": false,
577
+ "single_word": false,
578
+ "special": true
579
+ },
580
+ "151715": {
581
+ "content": "|<EXTRA_TOKENS_69>|",
582
+ "lstrip": false,
583
+ "normalized": false,
584
+ "rstrip": false,
585
+ "single_word": false,
586
+ "special": true
587
+ },
588
+ "151716": {
589
+ "content": "|<EXTRA_TOKENS_70>|",
590
+ "lstrip": false,
591
+ "normalized": false,
592
+ "rstrip": false,
593
+ "single_word": false,
594
+ "special": true
595
+ },
596
+ "151717": {
597
+ "content": "|<EXTRA_TOKENS_71>|",
598
+ "lstrip": false,
599
+ "normalized": false,
600
+ "rstrip": false,
601
+ "single_word": false,
602
+ "special": true
603
+ },
604
+ "151718": {
605
+ "content": "|<EXTRA_TOKENS_72>|",
606
+ "lstrip": false,
607
+ "normalized": false,
608
+ "rstrip": false,
609
+ "single_word": false,
610
+ "special": true
611
+ },
612
+ "151719": {
613
+ "content": "|<EXTRA_TOKENS_73>|",
614
+ "lstrip": false,
615
+ "normalized": false,
616
+ "rstrip": false,
617
+ "single_word": false,
618
+ "special": true
619
+ },
620
+ "151720": {
621
+ "content": "|<EXTRA_TOKENS_74>|",
622
+ "lstrip": false,
623
+ "normalized": false,
624
+ "rstrip": false,
625
+ "single_word": false,
626
+ "special": true
627
+ },
628
+ "151721": {
629
+ "content": "|<EXTRA_TOKENS_75>|",
630
+ "lstrip": false,
631
+ "normalized": false,
632
+ "rstrip": false,
633
+ "single_word": false,
634
+ "special": true
635
+ },
636
+ "151722": {
637
+ "content": "|<EXTRA_TOKENS_76>|",
638
+ "lstrip": false,
639
+ "normalized": false,
640
+ "rstrip": false,
641
+ "single_word": false,
642
+ "special": true
643
+ },
644
+ "151723": {
645
+ "content": "|<EXTRA_TOKENS_77>|",
646
+ "lstrip": false,
647
+ "normalized": false,
648
+ "rstrip": false,
649
+ "single_word": false,
650
+ "special": true
651
+ },
652
+ "151724": {
653
+ "content": "|<EXTRA_TOKENS_78>|",
654
+ "lstrip": false,
655
+ "normalized": false,
656
+ "rstrip": false,
657
+ "single_word": false,
658
+ "special": true
659
+ },
660
+ "151725": {
661
+ "content": "|<EXTRA_TOKENS_79>|",
662
+ "lstrip": false,
663
+ "normalized": false,
664
+ "rstrip": false,
665
+ "single_word": false,
666
+ "special": true
667
+ },
668
+ "151726": {
669
+ "content": "|<EXTRA_TOKENS_80>|",
670
+ "lstrip": false,
671
+ "normalized": false,
672
+ "rstrip": false,
673
+ "single_word": false,
674
+ "special": true
675
+ },
676
+ "151727": {
677
+ "content": "|<EXTRA_TOKENS_81>|",
678
+ "lstrip": false,
679
+ "normalized": false,
680
+ "rstrip": false,
681
+ "single_word": false,
682
+ "special": true
683
+ },
684
+ "151728": {
685
+ "content": "|<EXTRA_TOKENS_82>|",
686
+ "lstrip": false,
687
+ "normalized": false,
688
+ "rstrip": false,
689
+ "single_word": false,
690
+ "special": true
691
+ },
692
+ "151729": {
693
+ "content": "|<EXTRA_TOKENS_83>|",
694
+ "lstrip": false,
695
+ "normalized": false,
696
+ "rstrip": false,
697
+ "single_word": false,
698
+ "special": true
699
+ },
700
+ "151730": {
701
+ "content": "|<EXTRA_TOKENS_84>|",
702
+ "lstrip": false,
703
+ "normalized": false,
704
+ "rstrip": false,
705
+ "single_word": false,
706
+ "special": true
707
+ },
708
+ "151731": {
709
+ "content": "|<EXTRA_TOKENS_85>|",
710
+ "lstrip": false,
711
+ "normalized": false,
712
+ "rstrip": false,
713
+ "single_word": false,
714
+ "special": true
715
+ },
716
+ "151732": {
717
+ "content": "|<EXTRA_TOKENS_86>|",
718
+ "lstrip": false,
719
+ "normalized": false,
720
+ "rstrip": false,
721
+ "single_word": false,
722
+ "special": true
723
+ },
724
+ "151733": {
725
+ "content": "|<EXTRA_TOKENS_87>|",
726
+ "lstrip": false,
727
+ "normalized": false,
728
+ "rstrip": false,
729
+ "single_word": false,
730
+ "special": true
731
+ },
732
+ "151734": {
733
+ "content": "|<EXTRA_TOKENS_88>|",
734
+ "lstrip": false,
735
+ "normalized": false,
736
+ "rstrip": false,
737
+ "single_word": false,
738
+ "special": true
739
+ },
740
+ "151735": {
741
+ "content": "|<EXTRA_TOKENS_89>|",
742
+ "lstrip": false,
743
+ "normalized": false,
744
+ "rstrip": false,
745
+ "single_word": false,
746
+ "special": true
747
+ },
748
+ "151736": {
749
+ "content": "|<EXTRA_TOKENS_90>|",
750
+ "lstrip": false,
751
+ "normalized": false,
752
+ "rstrip": false,
753
+ "single_word": false,
754
+ "special": true
755
+ },
756
+ "151737": {
757
+ "content": "|<EXTRA_TOKENS_91>|",
758
+ "lstrip": false,
759
+ "normalized": false,
760
+ "rstrip": false,
761
+ "single_word": false,
762
+ "special": true
763
+ },
764
+ "151738": {
765
+ "content": "|<EXTRA_TOKENS_92>|",
766
+ "lstrip": false,
767
+ "normalized": false,
768
+ "rstrip": false,
769
+ "single_word": false,
770
+ "special": true
771
+ },
772
+ "151739": {
773
+ "content": "|<EXTRA_TOKENS_93>|",
774
+ "lstrip": false,
775
+ "normalized": false,
776
+ "rstrip": false,
777
+ "single_word": false,
778
+ "special": true
779
+ },
780
+ "151740": {
781
+ "content": "|<EXTRA_TOKENS_94>|",
782
+ "lstrip": false,
783
+ "normalized": false,
784
+ "rstrip": false,
785
+ "single_word": false,
786
+ "special": true
787
+ },
788
+ "151741": {
789
+ "content": "|<EXTRA_TOKENS_95>|",
790
+ "lstrip": false,
791
+ "normalized": false,
792
+ "rstrip": false,
793
+ "single_word": false,
794
+ "special": true
795
+ },
796
+ "151742": {
797
+ "content": "|<EXTRA_TOKENS_96>|",
798
+ "lstrip": false,
799
+ "normalized": false,
800
+ "rstrip": false,
801
+ "single_word": false,
802
+ "special": true
803
+ },
804
+ "151743": {
805
+ "content": "|<EXTRA_TOKENS_97>|",
806
+ "lstrip": false,
807
+ "normalized": false,
808
+ "rstrip": false,
809
+ "single_word": false,
810
+ "special": true
811
+ },
812
+ "151744": {
813
+ "content": "|<EXTRA_TOKENS_98>|",
814
+ "lstrip": false,
815
+ "normalized": false,
816
+ "rstrip": false,
817
+ "single_word": false,
818
+ "special": true
819
+ },
820
+ "151745": {
821
+ "content": "|<EXTRA_TOKENS_99>|",
822
+ "lstrip": false,
823
+ "normalized": false,
824
+ "rstrip": false,
825
+ "single_word": false,
826
+ "special": true
827
+ },
828
+ "151746": {
829
+ "content": "|<EXTRA_TOKENS_100>|",
830
+ "lstrip": false,
831
+ "normalized": false,
832
+ "rstrip": false,
833
+ "single_word": false,
834
+ "special": true
835
+ },
836
+ "151747": {
837
+ "content": "|<EXTRA_TOKENS_101>|",
838
+ "lstrip": false,
839
+ "normalized": false,
840
+ "rstrip": false,
841
+ "single_word": false,
842
+ "special": true
843
+ },
844
+ "151748": {
845
+ "content": "|<EXTRA_TOKENS_102>|",
846
+ "lstrip": false,
847
+ "normalized": false,
848
+ "rstrip": false,
849
+ "single_word": false,
850
+ "special": true
851
+ },
852
+ "151749": {
853
+ "content": "|<EXTRA_TOKENS_103>|",
854
+ "lstrip": false,
855
+ "normalized": false,
856
+ "rstrip": false,
857
+ "single_word": false,
858
+ "special": true
859
+ },
860
+ "151750": {
861
+ "content": "|<EXTRA_TOKENS_104>|",
862
+ "lstrip": false,
863
+ "normalized": false,
864
+ "rstrip": false,
865
+ "single_word": false,
866
+ "special": true
867
+ },
868
+ "151751": {
869
+ "content": "|<EXTRA_TOKENS_105>|",
870
+ "lstrip": false,
871
+ "normalized": false,
872
+ "rstrip": false,
873
+ "single_word": false,
874
+ "special": true
875
+ },
876
+ "151752": {
877
+ "content": "|<EXTRA_TOKENS_106>|",
878
+ "lstrip": false,
879
+ "normalized": false,
880
+ "rstrip": false,
881
+ "single_word": false,
882
+ "special": true
883
+ },
884
+ "151753": {
885
+ "content": "|<EXTRA_TOKENS_107>|",
886
+ "lstrip": false,
887
+ "normalized": false,
888
+ "rstrip": false,
889
+ "single_word": false,
890
+ "special": true
891
+ },
892
+ "151754": {
893
+ "content": "|<EXTRA_TOKENS_108>|",
894
+ "lstrip": false,
895
+ "normalized": false,
896
+ "rstrip": false,
897
+ "single_word": false,
898
+ "special": true
899
+ },
900
+ "151755": {
901
+ "content": "|<EXTRA_TOKENS_109>|",
902
+ "lstrip": false,
903
+ "normalized": false,
904
+ "rstrip": false,
905
+ "single_word": false,
906
+ "special": true
907
+ },
908
+ "151756": {
909
+ "content": "|<EXTRA_TOKENS_110>|",
910
+ "lstrip": false,
911
+ "normalized": false,
912
+ "rstrip": false,
913
+ "single_word": false,
914
+ "special": true
915
+ },
916
+ "151757": {
917
+ "content": "|<EXTRA_TOKENS_111>|",
918
+ "lstrip": false,
919
+ "normalized": false,
920
+ "rstrip": false,
921
+ "single_word": false,
922
+ "special": true
923
+ },
924
+ "151758": {
925
+ "content": "|<EXTRA_TOKENS_112>|",
926
+ "lstrip": false,
927
+ "normalized": false,
928
+ "rstrip": false,
929
+ "single_word": false,
930
+ "special": true
931
+ },
932
+ "151759": {
933
+ "content": "|<EXTRA_TOKENS_113>|",
934
+ "lstrip": false,
935
+ "normalized": false,
936
+ "rstrip": false,
937
+ "single_word": false,
938
+ "special": true
939
+ },
940
+ "151760": {
941
+ "content": "|<EXTRA_TOKENS_114>|",
942
+ "lstrip": false,
943
+ "normalized": false,
944
+ "rstrip": false,
945
+ "single_word": false,
946
+ "special": true
947
+ },
948
+ "151761": {
949
+ "content": "|<EXTRA_TOKENS_115>|",
950
+ "lstrip": false,
951
+ "normalized": false,
952
+ "rstrip": false,
953
+ "single_word": false,
954
+ "special": true
955
+ },
956
+ "151762": {
957
+ "content": "|<EXTRA_TOKENS_116>|",
958
+ "lstrip": false,
959
+ "normalized": false,
960
+ "rstrip": false,
961
+ "single_word": false,
962
+ "special": true
963
+ },
964
+ "151763": {
965
+ "content": "|<EXTRA_TOKENS_117>|",
966
+ "lstrip": false,
967
+ "normalized": false,
968
+ "rstrip": false,
969
+ "single_word": false,
970
+ "special": true
971
+ },
972
+ "151764": {
973
+ "content": "|<EXTRA_TOKENS_118>|",
974
+ "lstrip": false,
975
+ "normalized": false,
976
+ "rstrip": false,
977
+ "single_word": false,
978
+ "special": true
979
+ },
980
+ "151765": {
981
+ "content": "|<EXTRA_TOKENS_119>|",
982
+ "lstrip": false,
983
+ "normalized": false,
984
+ "rstrip": false,
985
+ "single_word": false,
986
+ "special": true
987
+ },
988
+ "151766": {
989
+ "content": "|<EXTRA_TOKENS_120>|",
990
+ "lstrip": false,
991
+ "normalized": false,
992
+ "rstrip": false,
993
+ "single_word": false,
994
+ "special": true
995
+ },
996
+ "151767": {
997
+ "content": "|<EXTRA_TOKENS_121>|",
998
+ "lstrip": false,
999
+ "normalized": false,
1000
+ "rstrip": false,
1001
+ "single_word": false,
1002
+ "special": true
1003
+ },
1004
+ "151768": {
1005
+ "content": "|<EXTRA_TOKENS_122>|",
1006
+ "lstrip": false,
1007
+ "normalized": false,
1008
+ "rstrip": false,
1009
+ "single_word": false,
1010
+ "special": true
1011
+ },
1012
+ "151769": {
1013
+ "content": "|<EXTRA_TOKENS_123>|",
1014
+ "lstrip": false,
1015
+ "normalized": false,
1016
+ "rstrip": false,
1017
+ "single_word": false,
1018
+ "special": true
1019
+ },
1020
+ "151770": {
1021
+ "content": "|<EXTRA_TOKENS_124>|",
1022
+ "lstrip": false,
1023
+ "normalized": false,
1024
+ "rstrip": false,
1025
+ "single_word": false,
1026
+ "special": true
1027
+ },
1028
+ "151771": {
1029
+ "content": "|<EXTRA_TOKENS_125>|",
1030
+ "lstrip": false,
1031
+ "normalized": false,
1032
+ "rstrip": false,
1033
+ "single_word": false,
1034
+ "special": true
1035
+ },
1036
+ "151772": {
1037
+ "content": "|<EXTRA_TOKENS_126>|",
1038
+ "lstrip": false,
1039
+ "normalized": false,
1040
+ "rstrip": false,
1041
+ "single_word": false,
1042
+ "special": true
1043
+ },
1044
+ "151773": {
1045
+ "content": "|<EXTRA_TOKENS_127>|",
1046
+ "lstrip": false,
1047
+ "normalized": false,
1048
+ "rstrip": false,
1049
+ "single_word": false,
1050
+ "special": true
1051
+ },
1052
+ "151774": {
1053
+ "content": "|<EXTRA_TOKENS_128>|",
1054
+ "lstrip": false,
1055
+ "normalized": false,
1056
+ "rstrip": false,
1057
+ "single_word": false,
1058
+ "special": true
1059
+ },
1060
+ "151775": {
1061
+ "content": "|<EXTRA_TOKENS_129>|",
1062
+ "lstrip": false,
1063
+ "normalized": false,
1064
+ "rstrip": false,
1065
+ "single_word": false,
1066
+ "special": true
1067
+ },
1068
+ "151776": {
1069
+ "content": "|<EXTRA_TOKENS_130>|",
1070
+ "lstrip": false,
1071
+ "normalized": false,
1072
+ "rstrip": false,
1073
+ "single_word": false,
1074
+ "special": true
1075
+ },
1076
+ "151777": {
1077
+ "content": "|<EXTRA_TOKENS_131>|",
1078
+ "lstrip": false,
1079
+ "normalized": false,
1080
+ "rstrip": false,
1081
+ "single_word": false,
1082
+ "special": true
1083
+ },
1084
+ "151778": {
1085
+ "content": "|<EXTRA_TOKENS_132>|",
1086
+ "lstrip": false,
1087
+ "normalized": false,
1088
+ "rstrip": false,
1089
+ "single_word": false,
1090
+ "special": true
1091
+ },
1092
+ "151779": {
1093
+ "content": "|<EXTRA_TOKENS_133>|",
1094
+ "lstrip": false,
1095
+ "normalized": false,
1096
+ "rstrip": false,
1097
+ "single_word": false,
1098
+ "special": true
1099
+ },
1100
+ "151780": {
1101
+ "content": "|<EXTRA_TOKENS_134>|",
1102
+ "lstrip": false,
1103
+ "normalized": false,
1104
+ "rstrip": false,
1105
+ "single_word": false,
1106
+ "special": true
1107
+ },
1108
+ "151781": {
1109
+ "content": "|<EXTRA_TOKENS_135>|",
1110
+ "lstrip": false,
1111
+ "normalized": false,
1112
+ "rstrip": false,
1113
+ "single_word": false,
1114
+ "special": true
1115
+ },
1116
+ "151782": {
1117
+ "content": "|<EXTRA_TOKENS_136>|",
1118
+ "lstrip": false,
1119
+ "normalized": false,
1120
+ "rstrip": false,
1121
+ "single_word": false,
1122
+ "special": true
1123
+ },
1124
+ "151783": {
1125
+ "content": "|<EXTRA_TOKENS_137>|",
1126
+ "lstrip": false,
1127
+ "normalized": false,
1128
+ "rstrip": false,
1129
+ "single_word": false,
1130
+ "special": true
1131
+ },
1132
+ "151784": {
1133
+ "content": "|<EXTRA_TOKENS_138>|",
1134
+ "lstrip": false,
1135
+ "normalized": false,
1136
+ "rstrip": false,
1137
+ "single_word": false,
1138
+ "special": true
1139
+ },
1140
+ "151785": {
1141
+ "content": "|<EXTRA_TOKENS_139>|",
1142
+ "lstrip": false,
1143
+ "normalized": false,
1144
+ "rstrip": false,
1145
+ "single_word": false,
1146
+ "special": true
1147
+ },
1148
+ "151786": {
1149
+ "content": "|<EXTRA_TOKENS_140>|",
1150
+ "lstrip": false,
1151
+ "normalized": false,
1152
+ "rstrip": false,
1153
+ "single_word": false,
1154
+ "special": true
1155
+ },
1156
+ "151787": {
1157
+ "content": "|<EXTRA_TOKENS_141>|",
1158
+ "lstrip": false,
1159
+ "normalized": false,
1160
+ "rstrip": false,
1161
+ "single_word": false,
1162
+ "special": true
1163
+ },
1164
+ "151788": {
1165
+ "content": "|<EXTRA_TOKENS_142>|",
1166
+ "lstrip": false,
1167
+ "normalized": false,
1168
+ "rstrip": false,
1169
+ "single_word": false,
1170
+ "special": true
1171
+ },
1172
+ "151789": {
1173
+ "content": "|<EXTRA_TOKENS_143>|",
1174
+ "lstrip": false,
1175
+ "normalized": false,
1176
+ "rstrip": false,
1177
+ "single_word": false,
1178
+ "special": true
1179
+ },
1180
+ "151790": {
1181
+ "content": "|<EXTRA_TOKENS_144>|",
1182
+ "lstrip": false,
1183
+ "normalized": false,
1184
+ "rstrip": false,
1185
+ "single_word": false,
1186
+ "special": true
1187
+ },
1188
+ "151791": {
1189
+ "content": "|<EXTRA_TOKENS_145>|",
1190
+ "lstrip": false,
1191
+ "normalized": false,
1192
+ "rstrip": false,
1193
+ "single_word": false,
1194
+ "special": true
1195
+ },
1196
+ "151792": {
1197
+ "content": "|<EXTRA_TOKENS_146>|",
1198
+ "lstrip": false,
1199
+ "normalized": false,
1200
+ "rstrip": false,
1201
+ "single_word": false,
1202
+ "special": true
1203
+ },
1204
+ "151793": {
1205
+ "content": "|<EXTRA_TOKENS_147>|",
1206
+ "lstrip": false,
1207
+ "normalized": false,
1208
+ "rstrip": false,
1209
+ "single_word": false,
1210
+ "special": true
1211
+ },
1212
+ "151794": {
1213
+ "content": "|<EXTRA_TOKENS_148>|",
1214
+ "lstrip": false,
1215
+ "normalized": false,
1216
+ "rstrip": false,
1217
+ "single_word": false,
1218
+ "special": true
1219
+ },
1220
+ "151795": {
1221
+ "content": "|<EXTRA_TOKENS_149>|",
1222
+ "lstrip": false,
1223
+ "normalized": false,
1224
+ "rstrip": false,
1225
+ "single_word": false,
1226
+ "special": true
1227
+ },
1228
+ "151796": {
1229
+ "content": "|<EXTRA_TOKENS_150>|",
1230
+ "lstrip": false,
1231
+ "normalized": false,
1232
+ "rstrip": false,
1233
+ "single_word": false,
1234
+ "special": true
1235
+ },
1236
+ "151797": {
1237
+ "content": "|<EXTRA_TOKENS_151>|",
1238
+ "lstrip": false,
1239
+ "normalized": false,
1240
+ "rstrip": false,
1241
+ "single_word": false,
1242
+ "special": true
1243
+ },
1244
+ "151798": {
1245
+ "content": "|<EXTRA_TOKENS_152>|",
1246
+ "lstrip": false,
1247
+ "normalized": false,
1248
+ "rstrip": false,
1249
+ "single_word": false,
1250
+ "special": true
1251
+ },
1252
+ "151799": {
1253
+ "content": "|<EXTRA_TOKENS_153>|",
1254
+ "lstrip": false,
1255
+ "normalized": false,
1256
+ "rstrip": false,
1257
+ "single_word": false,
1258
+ "special": true
1259
+ },
1260
+ "151800": {
1261
+ "content": "|<EXTRA_TOKENS_154>|",
1262
+ "lstrip": false,
1263
+ "normalized": false,
1264
+ "rstrip": false,
1265
+ "single_word": false,
1266
+ "special": true
1267
+ },
1268
+ "151801": {
1269
+ "content": "|<EXTRA_TOKENS_155>|",
1270
+ "lstrip": false,
1271
+ "normalized": false,
1272
+ "rstrip": false,
1273
+ "single_word": false,
1274
+ "special": true
1275
+ },
1276
+ "151802": {
1277
+ "content": "|<EXTRA_TOKENS_156>|",
1278
+ "lstrip": false,
1279
+ "normalized": false,
1280
+ "rstrip": false,
1281
+ "single_word": false,
1282
+ "special": true
1283
+ },
1284
+ "151803": {
1285
+ "content": "|<EXTRA_TOKENS_157>|",
1286
+ "lstrip": false,
1287
+ "normalized": false,
1288
+ "rstrip": false,
1289
+ "single_word": false,
1290
+ "special": true
1291
+ },
1292
+ "151804": {
1293
+ "content": "|<EXTRA_TOKENS_158>|",
1294
+ "lstrip": false,
1295
+ "normalized": false,
1296
+ "rstrip": false,
1297
+ "single_word": false,
1298
+ "special": true
1299
+ },
1300
+ "151805": {
1301
+ "content": "|<EXTRA_TOKENS_159>|",
1302
+ "lstrip": false,
1303
+ "normalized": false,
1304
+ "rstrip": false,
1305
+ "single_word": false,
1306
+ "special": true
1307
+ },
1308
+ "151806": {
1309
+ "content": "|<EXTRA_TOKENS_160>|",
1310
+ "lstrip": false,
1311
+ "normalized": false,
1312
+ "rstrip": false,
1313
+ "single_word": false,
1314
+ "special": true
1315
+ },
1316
+ "151807": {
1317
+ "content": "|<EXTRA_TOKENS_161>|",
1318
+ "lstrip": false,
1319
+ "normalized": false,
1320
+ "rstrip": false,
1321
+ "single_word": false,
1322
+ "special": true
1323
+ },
1324
+ "151808": {
1325
+ "content": "|<EXTRA_TOKENS_162>|",
1326
+ "lstrip": false,
1327
+ "normalized": false,
1328
+ "rstrip": false,
1329
+ "single_word": false,
1330
+ "special": true
1331
+ },
1332
+ "151809": {
1333
+ "content": "|<EXTRA_TOKENS_163>|",
1334
+ "lstrip": false,
1335
+ "normalized": false,
1336
+ "rstrip": false,
1337
+ "single_word": false,
1338
+ "special": true
1339
+ },
1340
+ "151810": {
1341
+ "content": "|<EXTRA_TOKENS_164>|",
1342
+ "lstrip": false,
1343
+ "normalized": false,
1344
+ "rstrip": false,
1345
+ "single_word": false,
1346
+ "special": true
1347
+ },
1348
+ "151811": {
1349
+ "content": "|<EXTRA_TOKENS_165>|",
1350
+ "lstrip": false,
1351
+ "normalized": false,
1352
+ "rstrip": false,
1353
+ "single_word": false,
1354
+ "special": true
1355
+ },
1356
+ "151812": {
1357
+ "content": "|<EXTRA_TOKENS_166>|",
1358
+ "lstrip": false,
1359
+ "normalized": false,
1360
+ "rstrip": false,
1361
+ "single_word": false,
1362
+ "special": true
1363
+ },
1364
+ "151813": {
1365
+ "content": "|<EXTRA_TOKENS_167>|",
1366
+ "lstrip": false,
1367
+ "normalized": false,
1368
+ "rstrip": false,
1369
+ "single_word": false,
1370
+ "special": true
1371
+ },
1372
+ "151814": {
1373
+ "content": "|<EXTRA_TOKENS_168>|",
1374
+ "lstrip": false,
1375
+ "normalized": false,
1376
+ "rstrip": false,
1377
+ "single_word": false,
1378
+ "special": true
1379
+ },
1380
+ "151815": {
1381
+ "content": "|<EXTRA_TOKENS_169>|",
1382
+ "lstrip": false,
1383
+ "normalized": false,
1384
+ "rstrip": false,
1385
+ "single_word": false,
1386
+ "special": true
1387
+ },
1388
+ "151816": {
1389
+ "content": "|<EXTRA_TOKENS_170>|",
1390
+ "lstrip": false,
1391
+ "normalized": false,
1392
+ "rstrip": false,
1393
+ "single_word": false,
1394
+ "special": true
1395
+ },
1396
+ "151817": {
1397
+ "content": "|<EXTRA_TOKENS_171>|",
1398
+ "lstrip": false,
1399
+ "normalized": false,
1400
+ "rstrip": false,
1401
+ "single_word": false,
1402
+ "special": true
1403
+ },
1404
+ "151818": {
1405
+ "content": "|<EXTRA_TOKENS_172>|",
1406
+ "lstrip": false,
1407
+ "normalized": false,
1408
+ "rstrip": false,
1409
+ "single_word": false,
1410
+ "special": true
1411
+ },
1412
+ "151819": {
1413
+ "content": "|<EXTRA_TOKENS_173>|",
1414
+ "lstrip": false,
1415
+ "normalized": false,
1416
+ "rstrip": false,
1417
+ "single_word": false,
1418
+ "special": true
1419
+ },
1420
+ "151820": {
1421
+ "content": "|<EXTRA_TOKENS_174>|",
1422
+ "lstrip": false,
1423
+ "normalized": false,
1424
+ "rstrip": false,
1425
+ "single_word": false,
1426
+ "special": true
1427
+ },
1428
+ "151821": {
1429
+ "content": "|<EXTRA_TOKENS_175>|",
1430
+ "lstrip": false,
1431
+ "normalized": false,
1432
+ "rstrip": false,
1433
+ "single_word": false,
1434
+ "special": true
1435
+ },
1436
+ "151822": {
1437
+ "content": "|<EXTRA_TOKENS_176>|",
1438
+ "lstrip": false,
1439
+ "normalized": false,
1440
+ "rstrip": false,
1441
+ "single_word": false,
1442
+ "special": true
1443
+ },
1444
+ "151823": {
1445
+ "content": "|<EXTRA_TOKENS_177>|",
1446
+ "lstrip": false,
1447
+ "normalized": false,
1448
+ "rstrip": false,
1449
+ "single_word": false,
1450
+ "special": true
1451
+ },
1452
+ "151824": {
1453
+ "content": "|<EXTRA_TOKENS_178>|",
1454
+ "lstrip": false,
1455
+ "normalized": false,
1456
+ "rstrip": false,
1457
+ "single_word": false,
1458
+ "special": true
1459
+ },
1460
+ "151825": {
1461
+ "content": "|<EXTRA_TOKENS_179>|",
1462
+ "lstrip": false,
1463
+ "normalized": false,
1464
+ "rstrip": false,
1465
+ "single_word": false,
1466
+ "special": true
1467
+ },
1468
+ "151826": {
1469
+ "content": "|<EXTRA_TOKENS_180>|",
1470
+ "lstrip": false,
1471
+ "normalized": false,
1472
+ "rstrip": false,
1473
+ "single_word": false,
1474
+ "special": true
1475
+ },
1476
+ "151827": {
1477
+ "content": "|<EXTRA_TOKENS_181>|",
1478
+ "lstrip": false,
1479
+ "normalized": false,
1480
+ "rstrip": false,
1481
+ "single_word": false,
1482
+ "special": true
1483
+ },
1484
+ "151828": {
1485
+ "content": "|<EXTRA_TOKENS_182>|",
1486
+ "lstrip": false,
1487
+ "normalized": false,
1488
+ "rstrip": false,
1489
+ "single_word": false,
1490
+ "special": true
1491
+ },
1492
+ "151829": {
1493
+ "content": "|<EXTRA_TOKENS_183>|",
1494
+ "lstrip": false,
1495
+ "normalized": false,
1496
+ "rstrip": false,
1497
+ "single_word": false,
1498
+ "special": true
1499
+ },
1500
+ "151830": {
1501
+ "content": "|<EXTRA_TOKENS_184>|",
1502
+ "lstrip": false,
1503
+ "normalized": false,
1504
+ "rstrip": false,
1505
+ "single_word": false,
1506
+ "special": true
1507
+ },
1508
+ "151831": {
1509
+ "content": "|<EXTRA_TOKENS_185>|",
1510
+ "lstrip": false,
1511
+ "normalized": false,
1512
+ "rstrip": false,
1513
+ "single_word": false,
1514
+ "special": true
1515
+ },
1516
+ "151832": {
1517
+ "content": "|<EXTRA_TOKENS_186>|",
1518
+ "lstrip": false,
1519
+ "normalized": false,
1520
+ "rstrip": false,
1521
+ "single_word": false,
1522
+ "special": true
1523
+ },
1524
+ "151833": {
1525
+ "content": "|<EXTRA_TOKENS_187>|",
1526
+ "lstrip": false,
1527
+ "normalized": false,
1528
+ "rstrip": false,
1529
+ "single_word": false,
1530
+ "special": true
1531
+ },
1532
+ "151834": {
1533
+ "content": "|<EXTRA_TOKENS_188>|",
1534
+ "lstrip": false,
1535
+ "normalized": false,
1536
+ "rstrip": false,
1537
+ "single_word": false,
1538
+ "special": true
1539
+ },
1540
+ "151835": {
1541
+ "content": "|<EXTRA_TOKENS_189>|",
1542
+ "lstrip": false,
1543
+ "normalized": false,
1544
+ "rstrip": false,
1545
+ "single_word": false,
1546
+ "special": true
1547
+ },
1548
+ "151836": {
1549
+ "content": "|<EXTRA_TOKENS_190>|",
1550
+ "lstrip": false,
1551
+ "normalized": false,
1552
+ "rstrip": false,
1553
+ "single_word": false,
1554
+ "special": true
1555
+ },
1556
+ "151837": {
1557
+ "content": "|<EXTRA_TOKENS_191>|",
1558
+ "lstrip": false,
1559
+ "normalized": false,
1560
+ "rstrip": false,
1561
+ "single_word": false,
1562
+ "special": true
1563
+ },
1564
+ "151838": {
1565
+ "content": "|<EXTRA_TOKENS_192>|",
1566
+ "lstrip": false,
1567
+ "normalized": false,
1568
+ "rstrip": false,
1569
+ "single_word": false,
1570
+ "special": true
1571
+ },
1572
+ "151839": {
1573
+ "content": "|<EXTRA_TOKENS_193>|",
1574
+ "lstrip": false,
1575
+ "normalized": false,
1576
+ "rstrip": false,
1577
+ "single_word": false,
1578
+ "special": true
1579
+ },
1580
+ "151840": {
1581
+ "content": "|<EXTRA_TOKENS_194>|",
1582
+ "lstrip": false,
1583
+ "normalized": false,
1584
+ "rstrip": false,
1585
+ "single_word": false,
1586
+ "special": true
1587
+ },
1588
+ "151841": {
1589
+ "content": "|<EXTRA_TOKENS_195>|",
1590
+ "lstrip": false,
1591
+ "normalized": false,
1592
+ "rstrip": false,
1593
+ "single_word": false,
1594
+ "special": true
1595
+ },
1596
+ "151842": {
1597
+ "content": "|<EXTRA_TOKENS_196>|",
1598
+ "lstrip": false,
1599
+ "normalized": false,
1600
+ "rstrip": false,
1601
+ "single_word": false,
1602
+ "special": true
1603
+ },
1604
+ "151843": {
1605
+ "content": "|<EXTRA_TOKENS_197>|",
1606
+ "lstrip": false,
1607
+ "normalized": false,
1608
+ "rstrip": false,
1609
+ "single_word": false,
1610
+ "special": true
1611
+ },
1612
+ "151844": {
1613
+ "content": "|<EXTRA_TOKENS_198>|",
1614
+ "lstrip": false,
1615
+ "normalized": false,
1616
+ "rstrip": false,
1617
+ "single_word": false,
1618
+ "special": true
1619
+ },
1620
+ "151845": {
1621
+ "content": "|<EXTRA_TOKENS_199>|",
1622
+ "lstrip": false,
1623
+ "normalized": false,
1624
+ "rstrip": false,
1625
+ "single_word": false,
1626
+ "special": true
1627
+ },
1628
+ "151846": {
1629
+ "content": "|<EXTRA_TOKENS_200>|",
1630
+ "lstrip": false,
1631
+ "normalized": false,
1632
+ "rstrip": false,
1633
+ "single_word": false,
1634
+ "special": true
1635
+ },
1636
+ "151847": {
1637
+ "content": "|<EXTRA_TOKENS_201>|",
1638
+ "lstrip": false,
1639
+ "normalized": false,
1640
+ "rstrip": false,
1641
+ "single_word": false,
1642
+ "special": true
1643
+ },
1644
+ "151848": {
1645
+ "content": "|<EXTRA_TOKENS_202>|",
1646
+ "lstrip": false,
1647
+ "normalized": false,
1648
+ "rstrip": false,
1649
+ "single_word": false,
1650
+ "special": true
1651
+ },
1652
+ "151849": {
1653
+ "content": "|<EXTRA_TOKENS_203>|",
1654
+ "lstrip": false,
1655
+ "normalized": false,
1656
+ "rstrip": false,
1657
+ "single_word": false,
1658
+ "special": true
1659
+ },
1660
+ "151850": {
1661
+ "content": "|<EXTRA_TOKENS_204>|",
1662
+ "lstrip": false,
1663
+ "normalized": false,
1664
+ "rstrip": false,
1665
+ "single_word": false,
1666
+ "special": true
1667
+ },
1668
+ "151851": {
1669
+ "content": "|<EXTRA_TOKENS_205>|",
1670
+ "lstrip": false,
1671
+ "normalized": false,
1672
+ "rstrip": false,
1673
+ "single_word": false,
1674
+ "special": true
1675
+ },
1676
+ "151852": {
1677
+ "content": "|<EXTRA_TOKENS_206>|",
1678
+ "lstrip": false,
1679
+ "normalized": false,
1680
+ "rstrip": false,
1681
+ "single_word": false,
1682
+ "special": true
1683
+ },
1684
+ "151853": {
1685
+ "content": "|<EXTRA_TOKENS_207>|",
1686
+ "lstrip": false,
1687
+ "normalized": false,
1688
+ "rstrip": false,
1689
+ "single_word": false,
1690
+ "special": true
1691
+ },
1692
+ "151854": {
1693
+ "content": "|<EXTRA_TOKENS_208>|",
1694
+ "lstrip": false,
1695
+ "normalized": false,
1696
+ "rstrip": false,
1697
+ "single_word": false,
1698
+ "special": true
1699
+ },
1700
+ "151855": {
1701
+ "content": "|<EXTRA_TOKENS_209>|",
1702
+ "lstrip": false,
1703
+ "normalized": false,
1704
+ "rstrip": false,
1705
+ "single_word": false,
1706
+ "special": true
1707
+ },
1708
+ "151856": {
1709
+ "content": "|<EXTRA_TOKENS_210>|",
1710
+ "lstrip": false,
1711
+ "normalized": false,
1712
+ "rstrip": false,
1713
+ "single_word": false,
1714
+ "special": true
1715
+ },
1716
+ "151857": {
1717
+ "content": "|<EXTRA_TOKENS_211>|",
1718
+ "lstrip": false,
1719
+ "normalized": false,
1720
+ "rstrip": false,
1721
+ "single_word": false,
1722
+ "special": true
1723
+ },
1724
+ "151858": {
1725
+ "content": "|<EXTRA_TOKENS_212>|",
1726
+ "lstrip": false,
1727
+ "normalized": false,
1728
+ "rstrip": false,
1729
+ "single_word": false,
1730
+ "special": true
1731
+ },
1732
+ "151859": {
1733
+ "content": "|<EXTRA_TOKENS_213>|",
1734
+ "lstrip": false,
1735
+ "normalized": false,
1736
+ "rstrip": false,
1737
+ "single_word": false,
1738
+ "special": true
1739
+ },
1740
+ "151860": {
1741
+ "content": "|<EXTRA_TOKENS_214>|",
1742
+ "lstrip": false,
1743
+ "normalized": false,
1744
+ "rstrip": false,
1745
+ "single_word": false,
1746
+ "special": true
1747
+ },
1748
+ "151861": {
1749
+ "content": "|<EXTRA_TOKENS_215>|",
1750
+ "lstrip": false,
1751
+ "normalized": false,
1752
+ "rstrip": false,
1753
+ "single_word": false,
1754
+ "special": true
1755
+ },
1756
+ "151862": {
1757
+ "content": "|<EXTRA_TOKENS_216>|",
1758
+ "lstrip": false,
1759
+ "normalized": false,
1760
+ "rstrip": false,
1761
+ "single_word": false,
1762
+ "special": true
1763
+ },
1764
+ "151863": {
1765
+ "content": "|<EXTRA_TOKENS_217>|",
1766
+ "lstrip": false,
1767
+ "normalized": false,
1768
+ "rstrip": false,
1769
+ "single_word": false,
1770
+ "special": true
1771
+ },
1772
+ "151864": {
1773
+ "content": "|<EXTRA_TOKENS_218>|",
1774
+ "lstrip": false,
1775
+ "normalized": false,
1776
+ "rstrip": false,
1777
+ "single_word": false,
1778
+ "special": true
1779
+ },
1780
+ "151865": {
1781
+ "content": "|<EXTRA_TOKENS_219>|",
1782
+ "lstrip": false,
1783
+ "normalized": false,
1784
+ "rstrip": false,
1785
+ "single_word": false,
1786
+ "special": true
1787
+ },
1788
+ "151866": {
1789
+ "content": "|<EXTRA_TOKENS_220>|",
1790
+ "lstrip": false,
1791
+ "normalized": false,
1792
+ "rstrip": false,
1793
+ "single_word": false,
1794
+ "special": true
1795
+ },
1796
+ "151867": {
1797
+ "content": "|<EXTRA_TOKENS_221>|",
1798
+ "lstrip": false,
1799
+ "normalized": false,
1800
+ "rstrip": false,
1801
+ "single_word": false,
1802
+ "special": true
1803
+ },
1804
+ "151868": {
1805
+ "content": "|<EXTRA_TOKENS_222>|",
1806
+ "lstrip": false,
1807
+ "normalized": false,
1808
+ "rstrip": false,
1809
+ "single_word": false,
1810
+ "special": true
1811
+ },
1812
+ "151869": {
1813
+ "content": "|<EXTRA_TOKENS_223>|",
1814
+ "lstrip": false,
1815
+ "normalized": false,
1816
+ "rstrip": false,
1817
+ "single_word": false,
1818
+ "special": true
1819
+ },
1820
+ "151870": {
1821
+ "content": "|<EXTRA_TOKENS_224>|",
1822
+ "lstrip": false,
1823
+ "normalized": false,
1824
+ "rstrip": false,
1825
+ "single_word": false,
1826
+ "special": true
1827
+ },
1828
+ "151871": {
1829
+ "content": "|<EXTRA_TOKENS_225>|",
1830
+ "lstrip": false,
1831
+ "normalized": false,
1832
+ "rstrip": false,
1833
+ "single_word": false,
1834
+ "special": true
1835
+ },
1836
+ "151872": {
1837
+ "content": "|<EXTRA_TOKENS_226>|",
1838
+ "lstrip": false,
1839
+ "normalized": false,
1840
+ "rstrip": false,
1841
+ "single_word": false,
1842
+ "special": true
1843
+ },
1844
+ "151873": {
1845
+ "content": "|<EXTRA_TOKENS_227>|",
1846
+ "lstrip": false,
1847
+ "normalized": false,
1848
+ "rstrip": false,
1849
+ "single_word": false,
1850
+ "special": true
1851
+ },
1852
+ "151874": {
1853
+ "content": "|<EXTRA_TOKENS_228>|",
1854
+ "lstrip": false,
1855
+ "normalized": false,
1856
+ "rstrip": false,
1857
+ "single_word": false,
1858
+ "special": true
1859
+ },
1860
+ "151875": {
1861
+ "content": "|<EXTRA_TOKENS_229>|",
1862
+ "lstrip": false,
1863
+ "normalized": false,
1864
+ "rstrip": false,
1865
+ "single_word": false,
1866
+ "special": true
1867
+ },
1868
+ "151876": {
1869
+ "content": "|<EXTRA_TOKENS_230>|",
1870
+ "lstrip": false,
1871
+ "normalized": false,
1872
+ "rstrip": false,
1873
+ "single_word": false,
1874
+ "special": true
1875
+ },
1876
+ "151877": {
1877
+ "content": "|<EXTRA_TOKENS_231>|",
1878
+ "lstrip": false,
1879
+ "normalized": false,
1880
+ "rstrip": false,
1881
+ "single_word": false,
1882
+ "special": true
1883
+ },
1884
+ "151878": {
1885
+ "content": "|<EXTRA_TOKENS_232>|",
1886
+ "lstrip": false,
1887
+ "normalized": false,
1888
+ "rstrip": false,
1889
+ "single_word": false,
1890
+ "special": true
1891
+ },
1892
+ "151879": {
1893
+ "content": "|<EXTRA_TOKENS_233>|",
1894
+ "lstrip": false,
1895
+ "normalized": false,
1896
+ "rstrip": false,
1897
+ "single_word": false,
1898
+ "special": true
1899
+ },
1900
+ "151880": {
1901
+ "content": "|<EXTRA_TOKENS_234>|",
1902
+ "lstrip": false,
1903
+ "normalized": false,
1904
+ "rstrip": false,
1905
+ "single_word": false,
1906
+ "special": true
1907
+ },
1908
+ "151881": {
1909
+ "content": "|<EXTRA_TOKENS_235>|",
1910
+ "lstrip": false,
1911
+ "normalized": false,
1912
+ "rstrip": false,
1913
+ "single_word": false,
1914
+ "special": true
1915
+ },
1916
+ "151882": {
1917
+ "content": "|<EXTRA_TOKENS_236>|",
1918
+ "lstrip": false,
1919
+ "normalized": false,
1920
+ "rstrip": false,
1921
+ "single_word": false,
1922
+ "special": true
1923
+ },
1924
+ "151883": {
1925
+ "content": "|<EXTRA_TOKENS_237>|",
1926
+ "lstrip": false,
1927
+ "normalized": false,
1928
+ "rstrip": false,
1929
+ "single_word": false,
1930
+ "special": true
1931
+ },
1932
+ "151884": {
1933
+ "content": "|<EXTRA_TOKENS_238>|",
1934
+ "lstrip": false,
1935
+ "normalized": false,
1936
+ "rstrip": false,
1937
+ "single_word": false,
1938
+ "special": true
1939
+ },
1940
+ "151885": {
1941
+ "content": "|<EXTRA_TOKENS_239>|",
1942
+ "lstrip": false,
1943
+ "normalized": false,
1944
+ "rstrip": false,
1945
+ "single_word": false,
1946
+ "special": true
1947
+ },
1948
+ "151886": {
1949
+ "content": "|<EXTRA_TOKENS_240>|",
1950
+ "lstrip": false,
1951
+ "normalized": false,
1952
+ "rstrip": false,
1953
+ "single_word": false,
1954
+ "special": true
1955
+ },
1956
+ "151887": {
1957
+ "content": "|<EXTRA_TOKENS_241>|",
1958
+ "lstrip": false,
1959
+ "normalized": false,
1960
+ "rstrip": false,
1961
+ "single_word": false,
1962
+ "special": true
1963
+ },
1964
+ "151888": {
1965
+ "content": "|<EXTRA_TOKENS_242>|",
1966
+ "lstrip": false,
1967
+ "normalized": false,
1968
+ "rstrip": false,
1969
+ "single_word": false,
1970
+ "special": true
1971
+ },
1972
+ "151889": {
1973
+ "content": "|<EXTRA_TOKENS_243>|",
1974
+ "lstrip": false,
1975
+ "normalized": false,
1976
+ "rstrip": false,
1977
+ "single_word": false,
1978
+ "special": true
1979
+ },
1980
+ "151890": {
1981
+ "content": "|<EXTRA_TOKENS_244>|",
1982
+ "lstrip": false,
1983
+ "normalized": false,
1984
+ "rstrip": false,
1985
+ "single_word": false,
1986
+ "special": true
1987
+ },
1988
+ "151891": {
1989
+ "content": "|<EXTRA_TOKENS_245>|",
1990
+ "lstrip": false,
1991
+ "normalized": false,
1992
+ "rstrip": false,
1993
+ "single_word": false,
1994
+ "special": true
1995
+ },
1996
+ "151892": {
1997
+ "content": "|<EXTRA_TOKENS_246>|",
1998
+ "lstrip": false,
1999
+ "normalized": false,
2000
+ "rstrip": false,
2001
+ "single_word": false,
2002
+ "special": true
2003
+ },
2004
+ "151893": {
2005
+ "content": "|<EXTRA_TOKENS_247>|",
2006
+ "lstrip": false,
2007
+ "normalized": false,
2008
+ "rstrip": false,
2009
+ "single_word": false,
2010
+ "special": true
2011
+ },
2012
+ "151894": {
2013
+ "content": "|<EXTRA_TOKENS_248>|",
2014
+ "lstrip": false,
2015
+ "normalized": false,
2016
+ "rstrip": false,
2017
+ "single_word": false,
2018
+ "special": true
2019
+ },
2020
+ "151895": {
2021
+ "content": "|<EXTRA_TOKENS_249>|",
2022
+ "lstrip": false,
2023
+ "normalized": false,
2024
+ "rstrip": false,
2025
+ "single_word": false,
2026
+ "special": true
2027
+ },
2028
+ "151896": {
2029
+ "content": "|<EXTRA_TOKENS_250>|",
2030
+ "lstrip": false,
2031
+ "normalized": false,
2032
+ "rstrip": false,
2033
+ "single_word": false,
2034
+ "special": true
2035
+ },
2036
+ "151897": {
2037
+ "content": "|<EXTRA_TOKENS_251>|",
2038
+ "lstrip": false,
2039
+ "normalized": false,
2040
+ "rstrip": false,
2041
+ "single_word": false,
2042
+ "special": true
2043
+ },
2044
+ "151898": {
2045
+ "content": "|<EXTRA_TOKENS_252>|",
2046
+ "lstrip": false,
2047
+ "normalized": false,
2048
+ "rstrip": false,
2049
+ "single_word": false,
2050
+ "special": true
2051
+ },
2052
+ "151899": {
2053
+ "content": "|<EXTRA_TOKENS_253>|",
2054
+ "lstrip": false,
2055
+ "normalized": false,
2056
+ "rstrip": false,
2057
+ "single_word": false,
2058
+ "special": true
2059
+ },
2060
+ "151900": {
2061
+ "content": "|<EXTRA_TOKENS_254>|",
2062
+ "lstrip": false,
2063
+ "normalized": false,
2064
+ "rstrip": false,
2065
+ "single_word": false,
2066
+ "special": true
2067
+ },
2068
+ "151901": {
2069
+ "content": "|<EXTRA_TOKENS_255>|",
2070
+ "lstrip": false,
2071
+ "normalized": false,
2072
+ "rstrip": false,
2073
+ "single_word": false,
2074
+ "special": true
2075
+ },
2076
+ "151902": {
2077
+ "content": "|<EXTRA_TOKENS_256>|",
2078
+ "lstrip": false,
2079
+ "normalized": false,
2080
+ "rstrip": false,
2081
+ "single_word": false,
2082
+ "special": true
2083
+ },
2084
+ "151903": {
2085
+ "content": "|<EXTRA_TOKENS_257>|",
2086
+ "lstrip": false,
2087
+ "normalized": false,
2088
+ "rstrip": false,
2089
+ "single_word": false,
2090
+ "special": true
2091
+ },
2092
+ "151904": {
2093
+ "content": "|<EXTRA_TOKENS_258>|",
2094
+ "lstrip": false,
2095
+ "normalized": false,
2096
+ "rstrip": false,
2097
+ "single_word": false,
2098
+ "special": true
2099
+ },
2100
+ "151905": {
2101
+ "content": "|<EXTRA_TOKENS_259>|",
2102
+ "lstrip": false,
2103
+ "normalized": false,
2104
+ "rstrip": false,
2105
+ "single_word": false,
2106
+ "special": true
2107
+ },
2108
+ "151906": {
2109
+ "content": "|<EXTRA_TOKENS_260>|",
2110
+ "lstrip": false,
2111
+ "normalized": false,
2112
+ "rstrip": false,
2113
+ "single_word": false,
2114
+ "special": true
2115
+ },
2116
+ "151907": {
2117
+ "content": "|<EXTRA_TOKENS_261>|",
2118
+ "lstrip": false,
2119
+ "normalized": false,
2120
+ "rstrip": false,
2121
+ "single_word": false,
2122
+ "special": true
2123
+ },
2124
+ "151908": {
2125
+ "content": "|<EXTRA_TOKENS_262>|",
2126
+ "lstrip": false,
2127
+ "normalized": false,
2128
+ "rstrip": false,
2129
+ "single_word": false,
2130
+ "special": true
2131
+ },
2132
+ "151909": {
2133
+ "content": "|<EXTRA_TOKENS_263>|",
2134
+ "lstrip": false,
2135
+ "normalized": false,
2136
+ "rstrip": false,
2137
+ "single_word": false,
2138
+ "special": true
2139
+ },
2140
+ "151910": {
2141
+ "content": "|<EXTRA_TOKENS_264>|",
2142
+ "lstrip": false,
2143
+ "normalized": false,
2144
+ "rstrip": false,
2145
+ "single_word": false,
2146
+ "special": true
2147
+ },
2148
+ "151911": {
2149
+ "content": "|<EXTRA_TOKENS_265>|",
2150
+ "lstrip": false,
2151
+ "normalized": false,
2152
+ "rstrip": false,
2153
+ "single_word": false,
2154
+ "special": true
2155
+ },
2156
+ "151912": {
2157
+ "content": "|<EXTRA_TOKENS_266>|",
2158
+ "lstrip": false,
2159
+ "normalized": false,
2160
+ "rstrip": false,
2161
+ "single_word": false,
2162
+ "special": true
2163
+ },
2164
+ "151913": {
2165
+ "content": "|<EXTRA_TOKENS_267>|",
2166
+ "lstrip": false,
2167
+ "normalized": false,
2168
+ "rstrip": false,
2169
+ "single_word": false,
2170
+ "special": true
2171
+ },
2172
+ "151914": {
2173
+ "content": "|<EXTRA_TOKENS_268>|",
2174
+ "lstrip": false,
2175
+ "normalized": false,
2176
+ "rstrip": false,
2177
+ "single_word": false,
2178
+ "special": true
2179
+ },
2180
+ "151915": {
2181
+ "content": "|<EXTRA_TOKENS_269>|",
2182
+ "lstrip": false,
2183
+ "normalized": false,
2184
+ "rstrip": false,
2185
+ "single_word": false,
2186
+ "special": true
2187
+ },
2188
+ "151916": {
2189
+ "content": "|<EXTRA_TOKENS_270>|",
2190
+ "lstrip": false,
2191
+ "normalized": false,
2192
+ "rstrip": false,
2193
+ "single_word": false,
2194
+ "special": true
2195
+ },
2196
+ "151917": {
2197
+ "content": "|<EXTRA_TOKENS_271>|",
2198
+ "lstrip": false,
2199
+ "normalized": false,
2200
+ "rstrip": false,
2201
+ "single_word": false,
2202
+ "special": true
2203
+ },
2204
+ "151918": {
2205
+ "content": "|<EXTRA_TOKENS_272>|",
2206
+ "lstrip": false,
2207
+ "normalized": false,
2208
+ "rstrip": false,
2209
+ "single_word": false,
2210
+ "special": true
2211
+ },
2212
+ "151919": {
2213
+ "content": "|<EXTRA_TOKENS_273>|",
2214
+ "lstrip": false,
2215
+ "normalized": false,
2216
+ "rstrip": false,
2217
+ "single_word": false,
2218
+ "special": true
2219
+ },
2220
+ "151920": {
2221
+ "content": "|<EXTRA_TOKENS_274>|",
2222
+ "lstrip": false,
2223
+ "normalized": false,
2224
+ "rstrip": false,
2225
+ "single_word": false,
2226
+ "special": true
2227
+ },
2228
+ "151921": {
2229
+ "content": "|<EXTRA_TOKENS_275>|",
2230
+ "lstrip": false,
2231
+ "normalized": false,
2232
+ "rstrip": false,
2233
+ "single_word": false,
2234
+ "special": true
2235
+ },
2236
+ "151922": {
2237
+ "content": "|<EXTRA_TOKENS_276>|",
2238
+ "lstrip": false,
2239
+ "normalized": false,
2240
+ "rstrip": false,
2241
+ "single_word": false,
2242
+ "special": true
2243
+ },
2244
+ "151923": {
2245
+ "content": "|<EXTRA_TOKENS_277>|",
2246
+ "lstrip": false,
2247
+ "normalized": false,
2248
+ "rstrip": false,
2249
+ "single_word": false,
2250
+ "special": true
2251
+ },
2252
+ "151924": {
2253
+ "content": "|<EXTRA_TOKENS_278>|",
2254
+ "lstrip": false,
2255
+ "normalized": false,
2256
+ "rstrip": false,
2257
+ "single_word": false,
2258
+ "special": true
2259
+ },
2260
+ "151925": {
2261
+ "content": "|<EXTRA_TOKENS_279>|",
2262
+ "lstrip": false,
2263
+ "normalized": false,
2264
+ "rstrip": false,
2265
+ "single_word": false,
2266
+ "special": true
2267
+ },
2268
+ "151926": {
2269
+ "content": "|<EXTRA_TOKENS_280>|",
2270
+ "lstrip": false,
2271
+ "normalized": false,
2272
+ "rstrip": false,
2273
+ "single_word": false,
2274
+ "special": true
2275
+ },
2276
+ "151927": {
2277
+ "content": "|<EXTRA_TOKENS_281>|",
2278
+ "lstrip": false,
2279
+ "normalized": false,
2280
+ "rstrip": false,
2281
+ "single_word": false,
2282
+ "special": true
2283
+ },
2284
+ "151928": {
2285
+ "content": "|<EXTRA_TOKENS_282>|",
2286
+ "lstrip": false,
2287
+ "normalized": false,
2288
+ "rstrip": false,
2289
+ "single_word": false,
2290
+ "special": true
2291
+ },
2292
+ "151929": {
2293
+ "content": "|<EXTRA_TOKENS_283>|",
2294
+ "lstrip": false,
2295
+ "normalized": false,
2296
+ "rstrip": false,
2297
+ "single_word": false,
2298
+ "special": true
2299
+ },
2300
+ "151930": {
2301
+ "content": "|<EXTRA_TOKENS_284>|",
2302
+ "lstrip": false,
2303
+ "normalized": false,
2304
+ "rstrip": false,
2305
+ "single_word": false,
2306
+ "special": true
2307
+ },
2308
+ "151931": {
2309
+ "content": "|<EXTRA_TOKENS_285>|",
2310
+ "lstrip": false,
2311
+ "normalized": false,
2312
+ "rstrip": false,
2313
+ "single_word": false,
2314
+ "special": true
2315
+ },
2316
+ "151932": {
2317
+ "content": "|<EXTRA_TOKENS_286>|",
2318
+ "lstrip": false,
2319
+ "normalized": false,
2320
+ "rstrip": false,
2321
+ "single_word": false,
2322
+ "special": true
2323
+ },
2324
+ "151933": {
2325
+ "content": "|<EXTRA_TOKENS_287>|",
2326
+ "lstrip": false,
2327
+ "normalized": false,
2328
+ "rstrip": false,
2329
+ "single_word": false,
2330
+ "special": true
2331
+ },
2332
+ "151934": {
2333
+ "content": "|<EXTRA_TOKENS_288>|",
2334
+ "lstrip": false,
2335
+ "normalized": false,
2336
+ "rstrip": false,
2337
+ "single_word": false,
2338
+ "special": true
2339
+ },
2340
+ "151935": {
2341
+ "content": "|<EXTRA_TOKENS_289>|",
2342
+ "lstrip": false,
2343
+ "normalized": false,
2344
+ "rstrip": false,
2345
+ "single_word": false,
2346
+ "special": true
2347
+ },
2348
+ "151936": {
2349
+ "content": "|<EXTRA_TOKENS_290>|",
2350
+ "lstrip": false,
2351
+ "normalized": false,
2352
+ "rstrip": false,
2353
+ "single_word": false,
2354
+ "special": true
2355
+ },
2356
+ "151937": {
2357
+ "content": "|<EXTRA_TOKENS_291>|",
2358
+ "lstrip": false,
2359
+ "normalized": false,
2360
+ "rstrip": false,
2361
+ "single_word": false,
2362
+ "special": true
2363
+ },
2364
+ "151938": {
2365
+ "content": "|<EXTRA_TOKENS_292>|",
2366
+ "lstrip": false,
2367
+ "normalized": false,
2368
+ "rstrip": false,
2369
+ "single_word": false,
2370
+ "special": true
2371
+ },
2372
+ "151939": {
2373
+ "content": "|<EXTRA_TOKENS_293>|",
2374
+ "lstrip": false,
2375
+ "normalized": false,
2376
+ "rstrip": false,
2377
+ "single_word": false,
2378
+ "special": true
2379
+ },
2380
+ "151940": {
2381
+ "content": "|<EXTRA_TOKENS_294>|",
2382
+ "lstrip": false,
2383
+ "normalized": false,
2384
+ "rstrip": false,
2385
+ "single_word": false,
2386
+ "special": true
2387
+ },
2388
+ "151941": {
2389
+ "content": "|<EXTRA_TOKENS_295>|",
2390
+ "lstrip": false,
2391
+ "normalized": false,
2392
+ "rstrip": false,
2393
+ "single_word": false,
2394
+ "special": true
2395
+ },
2396
+ "151942": {
2397
+ "content": "|<EXTRA_TOKENS_296>|",
2398
+ "lstrip": false,
2399
+ "normalized": false,
2400
+ "rstrip": false,
2401
+ "single_word": false,
2402
+ "special": true
2403
+ },
2404
+ "151943": {
2405
+ "content": "|<EXTRA_TOKENS_297>|",
2406
+ "lstrip": false,
2407
+ "normalized": false,
2408
+ "rstrip": false,
2409
+ "single_word": false,
2410
+ "special": true
2411
+ },
2412
+ "151944": {
2413
+ "content": "|<EXTRA_TOKENS_298>|",
2414
+ "lstrip": false,
2415
+ "normalized": false,
2416
+ "rstrip": false,
2417
+ "single_word": false,
2418
+ "special": true
2419
+ },
2420
+ "151945": {
2421
+ "content": "|<EXTRA_TOKENS_299>|",
2422
+ "lstrip": false,
2423
+ "normalized": false,
2424
+ "rstrip": false,
2425
+ "single_word": false,
2426
+ "special": true
2427
+ },
2428
+ "151946": {
2429
+ "content": "|<EXTRA_TOKENS_300>|",
2430
+ "lstrip": false,
2431
+ "normalized": false,
2432
+ "rstrip": false,
2433
+ "single_word": false,
2434
+ "special": true
2435
+ },
2436
+ "151947": {
2437
+ "content": "|<EXTRA_TOKENS_301>|",
2438
+ "lstrip": false,
2439
+ "normalized": false,
2440
+ "rstrip": false,
2441
+ "single_word": false,
2442
+ "special": true
2443
+ },
2444
+ "151948": {
2445
+ "content": "|<EXTRA_TOKENS_302>|",
2446
+ "lstrip": false,
2447
+ "normalized": false,
2448
+ "rstrip": false,
2449
+ "single_word": false,
2450
+ "special": true
2451
+ },
2452
+ "151949": {
2453
+ "content": "|<EXTRA_TOKENS_303>|",
2454
+ "lstrip": false,
2455
+ "normalized": false,
2456
+ "rstrip": false,
2457
+ "single_word": false,
2458
+ "special": true
2459
+ },
2460
+ "151950": {
2461
+ "content": "|<EXTRA_TOKENS_304>|",
2462
+ "lstrip": false,
2463
+ "normalized": false,
2464
+ "rstrip": false,
2465
+ "single_word": false,
2466
+ "special": true
2467
+ },
2468
+ "151951": {
2469
+ "content": "|<EXTRA_TOKENS_305>|",
2470
+ "lstrip": false,
2471
+ "normalized": false,
2472
+ "rstrip": false,
2473
+ "single_word": false,
2474
+ "special": true
2475
+ },
2476
+ "151952": {
2477
+ "content": "|<EXTRA_TOKENS_306>|",
2478
+ "lstrip": false,
2479
+ "normalized": false,
2480
+ "rstrip": false,
2481
+ "single_word": false,
2482
+ "special": true
2483
+ },
2484
+ "151953": {
2485
+ "content": "|<EXTRA_TOKENS_307>|",
2486
+ "lstrip": false,
2487
+ "normalized": false,
2488
+ "rstrip": false,
2489
+ "single_word": false,
2490
+ "special": true
2491
+ },
2492
+ "151954": {
2493
+ "content": "|<EXTRA_TOKENS_308>|",
2494
+ "lstrip": false,
2495
+ "normalized": false,
2496
+ "rstrip": false,
2497
+ "single_word": false,
2498
+ "special": true
2499
+ },
2500
+ "151955": {
2501
+ "content": "|<EXTRA_TOKENS_309>|",
2502
+ "lstrip": false,
2503
+ "normalized": false,
2504
+ "rstrip": false,
2505
+ "single_word": false,
2506
+ "special": true
2507
+ },
2508
+ "151956": {
2509
+ "content": "|<EXTRA_TOKENS_310>|",
2510
+ "lstrip": false,
2511
+ "normalized": false,
2512
+ "rstrip": false,
2513
+ "single_word": false,
2514
+ "special": true
2515
+ },
2516
+ "151957": {
2517
+ "content": "|<EXTRA_TOKENS_311>|",
2518
+ "lstrip": false,
2519
+ "normalized": false,
2520
+ "rstrip": false,
2521
+ "single_word": false,
2522
+ "special": true
2523
+ },
2524
+ "151958": {
2525
+ "content": "|<EXTRA_TOKENS_312>|",
2526
+ "lstrip": false,
2527
+ "normalized": false,
2528
+ "rstrip": false,
2529
+ "single_word": false,
2530
+ "special": true
2531
+ },
2532
+ "151959": {
2533
+ "content": "|<EXTRA_TOKENS_313>|",
2534
+ "lstrip": false,
2535
+ "normalized": false,
2536
+ "rstrip": false,
2537
+ "single_word": false,
2538
+ "special": true
2539
+ },
2540
+ "151960": {
2541
+ "content": "|<EXTRA_TOKENS_314>|",
2542
+ "lstrip": false,
2543
+ "normalized": false,
2544
+ "rstrip": false,
2545
+ "single_word": false,
2546
+ "special": true
2547
+ },
2548
+ "151961": {
2549
+ "content": "|<EXTRA_TOKENS_315>|",
2550
+ "lstrip": false,
2551
+ "normalized": false,
2552
+ "rstrip": false,
2553
+ "single_word": false,
2554
+ "special": true
2555
+ },
2556
+ "151962": {
2557
+ "content": "|<EXTRA_TOKENS_316>|",
2558
+ "lstrip": false,
2559
+ "normalized": false,
2560
+ "rstrip": false,
2561
+ "single_word": false,
2562
+ "special": true
2563
+ },
2564
+ "151963": {
2565
+ "content": "|<EXTRA_TOKENS_317>|",
2566
+ "lstrip": false,
2567
+ "normalized": false,
2568
+ "rstrip": false,
2569
+ "single_word": false,
2570
+ "special": true
2571
+ },
2572
+ "151964": {
2573
+ "content": "|<EXTRA_TOKENS_318>|",
2574
+ "lstrip": false,
2575
+ "normalized": false,
2576
+ "rstrip": false,
2577
+ "single_word": false,
2578
+ "special": true
2579
+ },
2580
+ "151965": {
2581
+ "content": "|<EXTRA_TOKENS_319>|",
2582
+ "lstrip": false,
2583
+ "normalized": false,
2584
+ "rstrip": false,
2585
+ "single_word": false,
2586
+ "special": true
2587
+ },
2588
+ "151966": {
2589
+ "content": "|<EXTRA_TOKENS_320>|",
2590
+ "lstrip": false,
2591
+ "normalized": false,
2592
+ "rstrip": false,
2593
+ "single_word": false,
2594
+ "special": true
2595
+ },
2596
+ "151967": {
2597
+ "content": "|<EXTRA_TOKENS_321>|",
2598
+ "lstrip": false,
2599
+ "normalized": false,
2600
+ "rstrip": false,
2601
+ "single_word": false,
2602
+ "special": true
2603
+ },
2604
+ "151968": {
2605
+ "content": "|<EXTRA_TOKENS_322>|",
2606
+ "lstrip": false,
2607
+ "normalized": false,
2608
+ "rstrip": false,
2609
+ "single_word": false,
2610
+ "special": true
2611
+ },
2612
+ "151969": {
2613
+ "content": "|<EXTRA_TOKENS_323>|",
2614
+ "lstrip": false,
2615
+ "normalized": false,
2616
+ "rstrip": false,
2617
+ "single_word": false,
2618
+ "special": true
2619
+ },
2620
+ "151970": {
2621
+ "content": "|<EXTRA_TOKENS_324>|",
2622
+ "lstrip": false,
2623
+ "normalized": false,
2624
+ "rstrip": false,
2625
+ "single_word": false,
2626
+ "special": true
2627
+ },
2628
+ "151971": {
2629
+ "content": "|<EXTRA_TOKENS_325>|",
2630
+ "lstrip": false,
2631
+ "normalized": false,
2632
+ "rstrip": false,
2633
+ "single_word": false,
2634
+ "special": true
2635
+ },
2636
+ "151972": {
2637
+ "content": "|<EXTRA_TOKENS_326>|",
2638
+ "lstrip": false,
2639
+ "normalized": false,
2640
+ "rstrip": false,
2641
+ "single_word": false,
2642
+ "special": true
2643
+ },
2644
+ "151973": {
2645
+ "content": "|<EXTRA_TOKENS_327>|",
2646
+ "lstrip": false,
2647
+ "normalized": false,
2648
+ "rstrip": false,
2649
+ "single_word": false,
2650
+ "special": true
2651
+ },
2652
+ "151974": {
2653
+ "content": "|<EXTRA_TOKENS_328>|",
2654
+ "lstrip": false,
2655
+ "normalized": false,
2656
+ "rstrip": false,
2657
+ "single_word": false,
2658
+ "special": true
2659
+ },
2660
+ "151975": {
2661
+ "content": "|<EXTRA_TOKENS_329>|",
2662
+ "lstrip": false,
2663
+ "normalized": false,
2664
+ "rstrip": false,
2665
+ "single_word": false,
2666
+ "special": true
2667
+ },
2668
+ "151976": {
2669
+ "content": "|<EXTRA_TOKENS_330>|",
2670
+ "lstrip": false,
2671
+ "normalized": false,
2672
+ "rstrip": false,
2673
+ "single_word": false,
2674
+ "special": true
2675
+ },
2676
+ "151977": {
2677
+ "content": "|<EXTRA_TOKENS_331>|",
2678
+ "lstrip": false,
2679
+ "normalized": false,
2680
+ "rstrip": false,
2681
+ "single_word": false,
2682
+ "special": true
2683
+ },
2684
+ "151978": {
2685
+ "content": "|<EXTRA_TOKENS_332>|",
2686
+ "lstrip": false,
2687
+ "normalized": false,
2688
+ "rstrip": false,
2689
+ "single_word": false,
2690
+ "special": true
2691
+ },
2692
+ "151979": {
2693
+ "content": "|<EXTRA_TOKENS_333>|",
2694
+ "lstrip": false,
2695
+ "normalized": false,
2696
+ "rstrip": false,
2697
+ "single_word": false,
2698
+ "special": true
2699
+ },
2700
+ "151980": {
2701
+ "content": "|<EXTRA_TOKENS_334>|",
2702
+ "lstrip": false,
2703
+ "normalized": false,
2704
+ "rstrip": false,
2705
+ "single_word": false,
2706
+ "special": true
2707
+ },
2708
+ "151981": {
2709
+ "content": "|<EXTRA_TOKENS_335>|",
2710
+ "lstrip": false,
2711
+ "normalized": false,
2712
+ "rstrip": false,
2713
+ "single_word": false,
2714
+ "special": true
2715
+ },
2716
+ "151982": {
2717
+ "content": "|<EXTRA_TOKENS_336>|",
2718
+ "lstrip": false,
2719
+ "normalized": false,
2720
+ "rstrip": false,
2721
+ "single_word": false,
2722
+ "special": true
2723
+ },
2724
+ "151983": {
2725
+ "content": "|<EXTRA_TOKENS_337>|",
2726
+ "lstrip": false,
2727
+ "normalized": false,
2728
+ "rstrip": false,
2729
+ "single_word": false,
2730
+ "special": true
2731
+ },
2732
+ "151984": {
2733
+ "content": "|<EXTRA_TOKENS_338>|",
2734
+ "lstrip": false,
2735
+ "normalized": false,
2736
+ "rstrip": false,
2737
+ "single_word": false,
2738
+ "special": true
2739
+ },
2740
+ "151985": {
2741
+ "content": "|<EXTRA_TOKENS_339>|",
2742
+ "lstrip": false,
2743
+ "normalized": false,
2744
+ "rstrip": false,
2745
+ "single_word": false,
2746
+ "special": true
2747
+ },
2748
+ "151986": {
2749
+ "content": "|<EXTRA_TOKENS_340>|",
2750
+ "lstrip": false,
2751
+ "normalized": false,
2752
+ "rstrip": false,
2753
+ "single_word": false,
2754
+ "special": true
2755
+ },
2756
+ "151987": {
2757
+ "content": "|<EXTRA_TOKENS_341>|",
2758
+ "lstrip": false,
2759
+ "normalized": false,
2760
+ "rstrip": false,
2761
+ "single_word": false,
2762
+ "special": true
2763
+ },
2764
+ "151988": {
2765
+ "content": "|<EXTRA_TOKENS_342>|",
2766
+ "lstrip": false,
2767
+ "normalized": false,
2768
+ "rstrip": false,
2769
+ "single_word": false,
2770
+ "special": true
2771
+ },
2772
+ "151989": {
2773
+ "content": "|<EXTRA_TOKENS_343>|",
2774
+ "lstrip": false,
2775
+ "normalized": false,
2776
+ "rstrip": false,
2777
+ "single_word": false,
2778
+ "special": true
2779
+ },
2780
+ "151990": {
2781
+ "content": "|<EXTRA_TOKENS_344>|",
2782
+ "lstrip": false,
2783
+ "normalized": false,
2784
+ "rstrip": false,
2785
+ "single_word": false,
2786
+ "special": true
2787
+ },
2788
+ "151991": {
2789
+ "content": "|<EXTRA_TOKENS_345>|",
2790
+ "lstrip": false,
2791
+ "normalized": false,
2792
+ "rstrip": false,
2793
+ "single_word": false,
2794
+ "special": true
2795
+ },
2796
+ "151992": {
2797
+ "content": "|<EXTRA_TOKENS_346>|",
2798
+ "lstrip": false,
2799
+ "normalized": false,
2800
+ "rstrip": false,
2801
+ "single_word": false,
2802
+ "special": true
2803
+ },
2804
+ "151993": {
2805
+ "content": "|<EXTRA_TOKENS_347>|",
2806
+ "lstrip": false,
2807
+ "normalized": false,
2808
+ "rstrip": false,
2809
+ "single_word": false,
2810
+ "special": true
2811
+ },
2812
+ "151994": {
2813
+ "content": "|<EXTRA_TOKENS_348>|",
2814
+ "lstrip": false,
2815
+ "normalized": false,
2816
+ "rstrip": false,
2817
+ "single_word": false,
2818
+ "special": true
2819
+ },
2820
+ "151995": {
2821
+ "content": "|<EXTRA_TOKENS_349>|",
2822
+ "lstrip": false,
2823
+ "normalized": false,
2824
+ "rstrip": false,
2825
+ "single_word": false,
2826
+ "special": true
2827
+ },
2828
+ "151996": {
2829
+ "content": "|<EXTRA_TOKENS_350>|",
2830
+ "lstrip": false,
2831
+ "normalized": false,
2832
+ "rstrip": false,
2833
+ "single_word": false,
2834
+ "special": true
2835
+ },
2836
+ "151997": {
2837
+ "content": "|<EXTRA_TOKENS_351>|",
2838
+ "lstrip": false,
2839
+ "normalized": false,
2840
+ "rstrip": false,
2841
+ "single_word": false,
2842
+ "special": true
2843
+ },
2844
+ "151998": {
2845
+ "content": "|<EXTRA_TOKENS_352>|",
2846
+ "lstrip": false,
2847
+ "normalized": false,
2848
+ "rstrip": false,
2849
+ "single_word": false,
2850
+ "special": true
2851
+ },
2852
+ "151999": {
2853
+ "content": "|<EXTRA_TOKENS_353>|",
2854
+ "lstrip": false,
2855
+ "normalized": false,
2856
+ "rstrip": false,
2857
+ "single_word": false,
2858
+ "special": true
2859
+ },
2860
+ "152000": {
2861
+ "content": "|<EXTRA_TOKENS_354>|",
2862
+ "lstrip": false,
2863
+ "normalized": false,
2864
+ "rstrip": false,
2865
+ "single_word": false,
2866
+ "special": true
2867
+ },
2868
+ "152001": {
2869
+ "content": "|<EXTRA_TOKENS_355>|",
2870
+ "lstrip": false,
2871
+ "normalized": false,
2872
+ "rstrip": false,
2873
+ "single_word": false,
2874
+ "special": true
2875
+ },
2876
+ "152002": {
2877
+ "content": "|<EXTRA_TOKENS_356>|",
2878
+ "lstrip": false,
2879
+ "normalized": false,
2880
+ "rstrip": false,
2881
+ "single_word": false,
2882
+ "special": true
2883
+ },
2884
+ "152003": {
2885
+ "content": "|<EXTRA_TOKENS_357>|",
2886
+ "lstrip": false,
2887
+ "normalized": false,
2888
+ "rstrip": false,
2889
+ "single_word": false,
2890
+ "special": true
2891
+ },
2892
+ "152004": {
2893
+ "content": "|<EXTRA_TOKENS_358>|",
2894
+ "lstrip": false,
2895
+ "normalized": false,
2896
+ "rstrip": false,
2897
+ "single_word": false,
2898
+ "special": true
2899
+ },
2900
+ "152005": {
2901
+ "content": "|<EXTRA_TOKENS_359>|",
2902
+ "lstrip": false,
2903
+ "normalized": false,
2904
+ "rstrip": false,
2905
+ "single_word": false,
2906
+ "special": true
2907
+ },
2908
+ "152006": {
2909
+ "content": "|<EXTRA_TOKENS_360>|",
2910
+ "lstrip": false,
2911
+ "normalized": false,
2912
+ "rstrip": false,
2913
+ "single_word": false,
2914
+ "special": true
2915
+ },
2916
+ "152007": {
2917
+ "content": "|<EXTRA_TOKENS_361>|",
2918
+ "lstrip": false,
2919
+ "normalized": false,
2920
+ "rstrip": false,
2921
+ "single_word": false,
2922
+ "special": true
2923
+ },
2924
+ "152008": {
2925
+ "content": "|<EXTRA_TOKENS_362>|",
2926
+ "lstrip": false,
2927
+ "normalized": false,
2928
+ "rstrip": false,
2929
+ "single_word": false,
2930
+ "special": true
2931
+ },
2932
+ "152009": {
2933
+ "content": "|<EXTRA_TOKENS_363>|",
2934
+ "lstrip": false,
2935
+ "normalized": false,
2936
+ "rstrip": false,
2937
+ "single_word": false,
2938
+ "special": true
2939
+ },
2940
+ "152010": {
2941
+ "content": "|<EXTRA_TOKENS_364>|",
2942
+ "lstrip": false,
2943
+ "normalized": false,
2944
+ "rstrip": false,
2945
+ "single_word": false,
2946
+ "special": true
2947
+ },
2948
+ "152011": {
2949
+ "content": "|<EXTRA_TOKENS_365>|",
2950
+ "lstrip": false,
2951
+ "normalized": false,
2952
+ "rstrip": false,
2953
+ "single_word": false,
2954
+ "special": true
2955
+ },
2956
+ "152012": {
2957
+ "content": "|<EXTRA_TOKENS_366>|",
2958
+ "lstrip": false,
2959
+ "normalized": false,
2960
+ "rstrip": false,
2961
+ "single_word": false,
2962
+ "special": true
2963
+ },
2964
+ "152013": {
2965
+ "content": "|<EXTRA_TOKENS_367>|",
2966
+ "lstrip": false,
2967
+ "normalized": false,
2968
+ "rstrip": false,
2969
+ "single_word": false,
2970
+ "special": true
2971
+ },
2972
+ "152014": {
2973
+ "content": "|<EXTRA_TOKENS_368>|",
2974
+ "lstrip": false,
2975
+ "normalized": false,
2976
+ "rstrip": false,
2977
+ "single_word": false,
2978
+ "special": true
2979
+ },
2980
+ "152015": {
2981
+ "content": "|<EXTRA_TOKENS_369>|",
2982
+ "lstrip": false,
2983
+ "normalized": false,
2984
+ "rstrip": false,
2985
+ "single_word": false,
2986
+ "special": true
2987
+ },
2988
+ "152016": {
2989
+ "content": "|<EXTRA_TOKENS_370>|",
2990
+ "lstrip": false,
2991
+ "normalized": false,
2992
+ "rstrip": false,
2993
+ "single_word": false,
2994
+ "special": true
2995
+ },
2996
+ "152017": {
2997
+ "content": "|<EXTRA_TOKENS_371>|",
2998
+ "lstrip": false,
2999
+ "normalized": false,
3000
+ "rstrip": false,
3001
+ "single_word": false,
3002
+ "special": true
3003
+ },
3004
+ "152018": {
3005
+ "content": "|<EXTRA_TOKENS_372>|",
3006
+ "lstrip": false,
3007
+ "normalized": false,
3008
+ "rstrip": false,
3009
+ "single_word": false,
3010
+ "special": true
3011
+ },
3012
+ "152019": {
3013
+ "content": "|<EXTRA_TOKENS_373>|",
3014
+ "lstrip": false,
3015
+ "normalized": false,
3016
+ "rstrip": false,
3017
+ "single_word": false,
3018
+ "special": true
3019
+ },
3020
+ "152020": {
3021
+ "content": "|<EXTRA_TOKENS_374>|",
3022
+ "lstrip": false,
3023
+ "normalized": false,
3024
+ "rstrip": false,
3025
+ "single_word": false,
3026
+ "special": true
3027
+ },
3028
+ "152021": {
3029
+ "content": "|<EXTRA_TOKENS_375>|",
3030
+ "lstrip": false,
3031
+ "normalized": false,
3032
+ "rstrip": false,
3033
+ "single_word": false,
3034
+ "special": true
3035
+ },
3036
+ "152022": {
3037
+ "content": "|<EXTRA_TOKENS_376>|",
3038
+ "lstrip": false,
3039
+ "normalized": false,
3040
+ "rstrip": false,
3041
+ "single_word": false,
3042
+ "special": true
3043
+ },
3044
+ "152023": {
3045
+ "content": "|<EXTRA_TOKENS_377>|",
3046
+ "lstrip": false,
3047
+ "normalized": false,
3048
+ "rstrip": false,
3049
+ "single_word": false,
3050
+ "special": true
3051
+ },
3052
+ "152024": {
3053
+ "content": "|<EXTRA_TOKENS_378>|",
3054
+ "lstrip": false,
3055
+ "normalized": false,
3056
+ "rstrip": false,
3057
+ "single_word": false,
3058
+ "special": true
3059
+ },
3060
+ "152025": {
3061
+ "content": "|<EXTRA_TOKENS_379>|",
3062
+ "lstrip": false,
3063
+ "normalized": false,
3064
+ "rstrip": false,
3065
+ "single_word": false,
3066
+ "special": true
3067
+ },
3068
+ "152026": {
3069
+ "content": "|<EXTRA_TOKENS_380>|",
3070
+ "lstrip": false,
3071
+ "normalized": false,
3072
+ "rstrip": false,
3073
+ "single_word": false,
3074
+ "special": true
3075
+ },
3076
+ "152027": {
3077
+ "content": "|<EXTRA_TOKENS_381>|",
3078
+ "lstrip": false,
3079
+ "normalized": false,
3080
+ "rstrip": false,
3081
+ "single_word": false,
3082
+ "special": true
3083
+ },
3084
+ "152028": {
3085
+ "content": "|<EXTRA_TOKENS_382>|",
3086
+ "lstrip": false,
3087
+ "normalized": false,
3088
+ "rstrip": false,
3089
+ "single_word": false,
3090
+ "special": true
3091
+ },
3092
+ "152029": {
3093
+ "content": "|<EXTRA_TOKENS_383>|",
3094
+ "lstrip": false,
3095
+ "normalized": false,
3096
+ "rstrip": false,
3097
+ "single_word": false,
3098
+ "special": true
3099
+ },
3100
+ "152030": {
3101
+ "content": "|<EXTRA_TOKENS_384>|",
3102
+ "lstrip": false,
3103
+ "normalized": false,
3104
+ "rstrip": false,
3105
+ "single_word": false,
3106
+ "special": true
3107
+ },
3108
+ "152031": {
3109
+ "content": "|<EXTRA_TOKENS_385>|",
3110
+ "lstrip": false,
3111
+ "normalized": false,
3112
+ "rstrip": false,
3113
+ "single_word": false,
3114
+ "special": true
3115
+ },
3116
+ "152032": {
3117
+ "content": "|<EXTRA_TOKENS_386>|",
3118
+ "lstrip": false,
3119
+ "normalized": false,
3120
+ "rstrip": false,
3121
+ "single_word": false,
3122
+ "special": true
3123
+ },
3124
+ "152033": {
3125
+ "content": "|<EXTRA_TOKENS_387>|",
3126
+ "lstrip": false,
3127
+ "normalized": false,
3128
+ "rstrip": false,
3129
+ "single_word": false,
3130
+ "special": true
3131
+ },
3132
+ "152034": {
3133
+ "content": "|<EXTRA_TOKENS_388>|",
3134
+ "lstrip": false,
3135
+ "normalized": false,
3136
+ "rstrip": false,
3137
+ "single_word": false,
3138
+ "special": true
3139
+ },
3140
+ "152035": {
3141
+ "content": "|<EXTRA_TOKENS_389>|",
3142
+ "lstrip": false,
3143
+ "normalized": false,
3144
+ "rstrip": false,
3145
+ "single_word": false,
3146
+ "special": true
3147
+ },
3148
+ "152036": {
3149
+ "content": "|<EXTRA_TOKENS_390>|",
3150
+ "lstrip": false,
3151
+ "normalized": false,
3152
+ "rstrip": false,
3153
+ "single_word": false,
3154
+ "special": true
3155
+ },
3156
+ "152037": {
3157
+ "content": "|<EXTRA_TOKENS_391>|",
3158
+ "lstrip": false,
3159
+ "normalized": false,
3160
+ "rstrip": false,
3161
+ "single_word": false,
3162
+ "special": true
3163
+ },
3164
+ "152038": {
3165
+ "content": "|<EXTRA_TOKENS_392>|",
3166
+ "lstrip": false,
3167
+ "normalized": false,
3168
+ "rstrip": false,
3169
+ "single_word": false,
3170
+ "special": true
3171
+ },
3172
+ "152039": {
3173
+ "content": "|<EXTRA_TOKENS_393>|",
3174
+ "lstrip": false,
3175
+ "normalized": false,
3176
+ "rstrip": false,
3177
+ "single_word": false,
3178
+ "special": true
3179
+ },
3180
+ "152040": {
3181
+ "content": "|<EXTRA_TOKENS_394>|",
3182
+ "lstrip": false,
3183
+ "normalized": false,
3184
+ "rstrip": false,
3185
+ "single_word": false,
3186
+ "special": true
3187
+ },
3188
+ "152041": {
3189
+ "content": "|<EXTRA_TOKENS_395>|",
3190
+ "lstrip": false,
3191
+ "normalized": false,
3192
+ "rstrip": false,
3193
+ "single_word": false,
3194
+ "special": true
3195
+ },
3196
+ "152042": {
3197
+ "content": "|<EXTRA_TOKENS_396>|",
3198
+ "lstrip": false,
3199
+ "normalized": false,
3200
+ "rstrip": false,
3201
+ "single_word": false,
3202
+ "special": true
3203
+ },
3204
+ "152043": {
3205
+ "content": "|<EXTRA_TOKENS_397>|",
3206
+ "lstrip": false,
3207
+ "normalized": false,
3208
+ "rstrip": false,
3209
+ "single_word": false,
3210
+ "special": true
3211
+ },
3212
+ "152044": {
3213
+ "content": "|<EXTRA_TOKENS_398>|",
3214
+ "lstrip": false,
3215
+ "normalized": false,
3216
+ "rstrip": false,
3217
+ "single_word": false,
3218
+ "special": true
3219
+ },
3220
+ "152045": {
3221
+ "content": "|<EXTRA_TOKENS_399>|",
3222
+ "lstrip": false,
3223
+ "normalized": false,
3224
+ "rstrip": false,
3225
+ "single_word": false,
3226
+ "special": true
3227
+ },
3228
+ "152046": {
3229
+ "content": "|<EXTRA_TOKENS_400>|",
3230
+ "lstrip": false,
3231
+ "normalized": false,
3232
+ "rstrip": false,
3233
+ "single_word": false,
3234
+ "special": true
3235
+ },
3236
+ "152047": {
3237
+ "content": "|<EXTRA_TOKENS_401>|",
3238
+ "lstrip": false,
3239
+ "normalized": false,
3240
+ "rstrip": false,
3241
+ "single_word": false,
3242
+ "special": true
3243
+ },
3244
+ "152048": {
3245
+ "content": "|<EXTRA_TOKENS_402>|",
3246
+ "lstrip": false,
3247
+ "normalized": false,
3248
+ "rstrip": false,
3249
+ "single_word": false,
3250
+ "special": true
3251
+ },
3252
+ "152049": {
3253
+ "content": "|<EXTRA_TOKENS_403>|",
3254
+ "lstrip": false,
3255
+ "normalized": false,
3256
+ "rstrip": false,
3257
+ "single_word": false,
3258
+ "special": true
3259
+ },
3260
+ "152050": {
3261
+ "content": "|<EXTRA_TOKENS_404>|",
3262
+ "lstrip": false,
3263
+ "normalized": false,
3264
+ "rstrip": false,
3265
+ "single_word": false,
3266
+ "special": true
3267
+ },
3268
+ "152051": {
3269
+ "content": "|<EXTRA_TOKENS_405>|",
3270
+ "lstrip": false,
3271
+ "normalized": false,
3272
+ "rstrip": false,
3273
+ "single_word": false,
3274
+ "special": true
3275
+ },
3276
+ "152052": {
3277
+ "content": "|<EXTRA_TOKENS_406>|",
3278
+ "lstrip": false,
3279
+ "normalized": false,
3280
+ "rstrip": false,
3281
+ "single_word": false,
3282
+ "special": true
3283
+ },
3284
+ "152053": {
3285
+ "content": "|<EXTRA_TOKENS_407>|",
3286
+ "lstrip": false,
3287
+ "normalized": false,
3288
+ "rstrip": false,
3289
+ "single_word": false,
3290
+ "special": true
3291
+ },
3292
+ "152054": {
3293
+ "content": "|<EXTRA_TOKENS_408>|",
3294
+ "lstrip": false,
3295
+ "normalized": false,
3296
+ "rstrip": false,
3297
+ "single_word": false,
3298
+ "special": true
3299
+ },
3300
+ "152055": {
3301
+ "content": "|<EXTRA_TOKENS_409>|",
3302
+ "lstrip": false,
3303
+ "normalized": false,
3304
+ "rstrip": false,
3305
+ "single_word": false,
3306
+ "special": true
3307
+ },
3308
+ "152056": {
3309
+ "content": "|<EXTRA_TOKENS_410>|",
3310
+ "lstrip": false,
3311
+ "normalized": false,
3312
+ "rstrip": false,
3313
+ "single_word": false,
3314
+ "special": true
3315
+ },
3316
+ "152057": {
3317
+ "content": "|<EXTRA_TOKENS_411>|",
3318
+ "lstrip": false,
3319
+ "normalized": false,
3320
+ "rstrip": false,
3321
+ "single_word": false,
3322
+ "special": true
3323
+ },
3324
+ "152058": {
3325
+ "content": "|<EXTRA_TOKENS_412>|",
3326
+ "lstrip": false,
3327
+ "normalized": false,
3328
+ "rstrip": false,
3329
+ "single_word": false,
3330
+ "special": true
3331
+ },
3332
+ "152059": {
3333
+ "content": "|<EXTRA_TOKENS_413>|",
3334
+ "lstrip": false,
3335
+ "normalized": false,
3336
+ "rstrip": false,
3337
+ "single_word": false,
3338
+ "special": true
3339
+ },
3340
+ "152060": {
3341
+ "content": "|<EXTRA_TOKENS_414>|",
3342
+ "lstrip": false,
3343
+ "normalized": false,
3344
+ "rstrip": false,
3345
+ "single_word": false,
3346
+ "special": true
3347
+ },
3348
+ "152061": {
3349
+ "content": "|<EXTRA_TOKENS_415>|",
3350
+ "lstrip": false,
3351
+ "normalized": false,
3352
+ "rstrip": false,
3353
+ "single_word": false,
3354
+ "special": true
3355
+ },
3356
+ "152062": {
3357
+ "content": "|<EXTRA_TOKENS_416>|",
3358
+ "lstrip": false,
3359
+ "normalized": false,
3360
+ "rstrip": false,
3361
+ "single_word": false,
3362
+ "special": true
3363
+ },
3364
+ "152063": {
3365
+ "content": "|<EXTRA_TOKENS_417>|",
3366
+ "lstrip": false,
3367
+ "normalized": false,
3368
+ "rstrip": false,
3369
+ "single_word": false,
3370
+ "special": true
3371
+ },
3372
+ "152064": {
3373
+ "content": "<im_start>",
3374
+ "lstrip": false,
3375
+ "normalized": false,
3376
+ "rstrip": false,
3377
+ "single_word": false,
3378
+ "special": true
3379
+ },
3380
+ "152065": {
3381
+ "content": "<im_end>",
3382
+ "lstrip": false,
3383
+ "normalized": false,
3384
+ "rstrip": false,
3385
+ "single_word": false,
3386
+ "special": true
3387
+ },
3388
+ "152066": {
3389
+ "content": "<im_patch>",
3390
+ "lstrip": false,
3391
+ "normalized": false,
3392
+ "rstrip": false,
3393
+ "single_word": false,
3394
+ "special": true
3395
+ },
3396
+ "152067": {
3397
+ "content": "<im_col>",
3398
+ "lstrip": false,
3399
+ "normalized": false,
3400
+ "rstrip": false,
3401
+ "single_word": false,
3402
+ "special": true
3403
+ },
3404
+ "152068": {
3405
+ "content": "<|image|>",
3406
+ "lstrip": false,
3407
+ "normalized": false,
3408
+ "rstrip": false,
3409
+ "single_word": false,
3410
+ "special": true
3411
+ }
3412
+ },
3413
+ "additional_special_tokens": [
3414
+ "|<EXTRA_TOKENS_0>|",
3415
+ "|<EXTRA_TOKENS_1>|",
3416
+ "|<EXTRA_TOKENS_2>|",
3417
+ "|<EXTRA_TOKENS_3>|",
3418
+ "|<EXTRA_TOKENS_4>|",
3419
+ "|<EXTRA_TOKENS_5>|",
3420
+ "|<EXTRA_TOKENS_6>|",
3421
+ "|<EXTRA_TOKENS_7>|",
3422
+ "|<EXTRA_TOKENS_8>|",
3423
+ "|<EXTRA_TOKENS_9>|",
3424
+ "|<EXTRA_TOKENS_10>|",
3425
+ "|<EXTRA_TOKENS_11>|",
3426
+ "|<EXTRA_TOKENS_12>|",
3427
+ "|<EXTRA_TOKENS_13>|",
3428
+ "|<EXTRA_TOKENS_14>|",
3429
+ "|<EXTRA_TOKENS_15>|",
3430
+ "|<EXTRA_TOKENS_16>|",
3431
+ "|<EXTRA_TOKENS_17>|",
3432
+ "|<EXTRA_TOKENS_18>|",
3433
+ "|<EXTRA_TOKENS_19>|",
3434
+ "|<EXTRA_TOKENS_20>|",
3435
+ "|<EXTRA_TOKENS_21>|",
3436
+ "|<EXTRA_TOKENS_22>|",
3437
+ "|<EXTRA_TOKENS_23>|",
3438
+ "|<EXTRA_TOKENS_24>|",
3439
+ "|<EXTRA_TOKENS_25>|",
3440
+ "|<EXTRA_TOKENS_26>|",
3441
+ "|<EXTRA_TOKENS_27>|",
3442
+ "|<EXTRA_TOKENS_28>|",
3443
+ "|<EXTRA_TOKENS_29>|",
3444
+ "|<EXTRA_TOKENS_30>|",
3445
+ "|<EXTRA_TOKENS_31>|",
3446
+ "|<EXTRA_TOKENS_32>|",
3447
+ "|<EXTRA_TOKENS_33>|",
3448
+ "|<EXTRA_TOKENS_34>|",
3449
+ "|<EXTRA_TOKENS_35>|",
3450
+ "|<EXTRA_TOKENS_36>|",
3451
+ "|<EXTRA_TOKENS_37>|",
3452
+ "|<EXTRA_TOKENS_38>|",
3453
+ "|<EXTRA_TOKENS_39>|",
3454
+ "|<EXTRA_TOKENS_40>|",
3455
+ "|<EXTRA_TOKENS_41>|",
3456
+ "|<EXTRA_TOKENS_42>|",
3457
+ "|<EXTRA_TOKENS_43>|",
3458
+ "|<EXTRA_TOKENS_44>|",
3459
+ "|<EXTRA_TOKENS_45>|",
3460
+ "|<EXTRA_TOKENS_46>|",
3461
+ "|<EXTRA_TOKENS_47>|",
3462
+ "|<EXTRA_TOKENS_48>|",
3463
+ "|<EXTRA_TOKENS_49>|",
3464
+ "|<EXTRA_TOKENS_50>|",
3465
+ "|<EXTRA_TOKENS_51>|",
3466
+ "|<EXTRA_TOKENS_52>|",
3467
+ "|<EXTRA_TOKENS_53>|",
3468
+ "|<EXTRA_TOKENS_54>|",
3469
+ "|<EXTRA_TOKENS_55>|",
3470
+ "|<EXTRA_TOKENS_56>|",
3471
+ "|<EXTRA_TOKENS_57>|",
3472
+ "|<EXTRA_TOKENS_58>|",
3473
+ "|<EXTRA_TOKENS_59>|",
3474
+ "|<EXTRA_TOKENS_60>|",
3475
+ "|<EXTRA_TOKENS_61>|",
3476
+ "|<EXTRA_TOKENS_62>|",
3477
+ "|<EXTRA_TOKENS_63>|",
3478
+ "|<EXTRA_TOKENS_64>|",
3479
+ "|<EXTRA_TOKENS_65>|",
3480
+ "|<EXTRA_TOKENS_66>|",
3481
+ "|<EXTRA_TOKENS_67>|",
3482
+ "|<EXTRA_TOKENS_68>|",
3483
+ "|<EXTRA_TOKENS_69>|",
3484
+ "|<EXTRA_TOKENS_70>|",
3485
+ "|<EXTRA_TOKENS_71>|",
3486
+ "|<EXTRA_TOKENS_72>|",
3487
+ "|<EXTRA_TOKENS_73>|",
3488
+ "|<EXTRA_TOKENS_74>|",
3489
+ "|<EXTRA_TOKENS_75>|",
3490
+ "|<EXTRA_TOKENS_76>|",
3491
+ "|<EXTRA_TOKENS_77>|",
3492
+ "|<EXTRA_TOKENS_78>|",
3493
+ "|<EXTRA_TOKENS_79>|",
3494
+ "|<EXTRA_TOKENS_80>|",
3495
+ "|<EXTRA_TOKENS_81>|",
3496
+ "|<EXTRA_TOKENS_82>|",
3497
+ "|<EXTRA_TOKENS_83>|",
3498
+ "|<EXTRA_TOKENS_84>|",
3499
+ "|<EXTRA_TOKENS_85>|",
3500
+ "|<EXTRA_TOKENS_86>|",
3501
+ "|<EXTRA_TOKENS_87>|",
3502
+ "|<EXTRA_TOKENS_88>|",
3503
+ "|<EXTRA_TOKENS_89>|",
3504
+ "|<EXTRA_TOKENS_90>|",
3505
+ "|<EXTRA_TOKENS_91>|",
3506
+ "|<EXTRA_TOKENS_92>|",
3507
+ "|<EXTRA_TOKENS_93>|",
3508
+ "|<EXTRA_TOKENS_94>|",
3509
+ "|<EXTRA_TOKENS_95>|",
3510
+ "|<EXTRA_TOKENS_96>|",
3511
+ "|<EXTRA_TOKENS_97>|",
3512
+ "|<EXTRA_TOKENS_98>|",
3513
+ "|<EXTRA_TOKENS_99>|",
3514
+ "|<EXTRA_TOKENS_100>|",
3515
+ "|<EXTRA_TOKENS_101>|",
3516
+ "|<EXTRA_TOKENS_102>|",
3517
+ "|<EXTRA_TOKENS_103>|",
3518
+ "|<EXTRA_TOKENS_104>|",
3519
+ "|<EXTRA_TOKENS_105>|",
3520
+ "|<EXTRA_TOKENS_106>|",
3521
+ "|<EXTRA_TOKENS_107>|",
3522
+ "|<EXTRA_TOKENS_108>|",
3523
+ "|<EXTRA_TOKENS_109>|",
3524
+ "|<EXTRA_TOKENS_110>|",
3525
+ "|<EXTRA_TOKENS_111>|",
3526
+ "|<EXTRA_TOKENS_112>|",
3527
+ "|<EXTRA_TOKENS_113>|",
3528
+ "|<EXTRA_TOKENS_114>|",
3529
+ "|<EXTRA_TOKENS_115>|",
3530
+ "|<EXTRA_TOKENS_116>|",
3531
+ "|<EXTRA_TOKENS_117>|",
3532
+ "|<EXTRA_TOKENS_118>|",
3533
+ "|<EXTRA_TOKENS_119>|",
3534
+ "|<EXTRA_TOKENS_120>|",
3535
+ "|<EXTRA_TOKENS_121>|",
3536
+ "|<EXTRA_TOKENS_122>|",
3537
+ "|<EXTRA_TOKENS_123>|",
3538
+ "|<EXTRA_TOKENS_124>|",
3539
+ "|<EXTRA_TOKENS_125>|",
3540
+ "|<EXTRA_TOKENS_126>|",
3541
+ "|<EXTRA_TOKENS_127>|",
3542
+ "|<EXTRA_TOKENS_128>|",
3543
+ "|<EXTRA_TOKENS_129>|",
3544
+ "|<EXTRA_TOKENS_130>|",
3545
+ "|<EXTRA_TOKENS_131>|",
3546
+ "|<EXTRA_TOKENS_132>|",
3547
+ "|<EXTRA_TOKENS_133>|",
3548
+ "|<EXTRA_TOKENS_134>|",
3549
+ "|<EXTRA_TOKENS_135>|",
3550
+ "|<EXTRA_TOKENS_136>|",
3551
+ "|<EXTRA_TOKENS_137>|",
3552
+ "|<EXTRA_TOKENS_138>|",
3553
+ "|<EXTRA_TOKENS_139>|",
3554
+ "|<EXTRA_TOKENS_140>|",
3555
+ "|<EXTRA_TOKENS_141>|",
3556
+ "|<EXTRA_TOKENS_142>|",
3557
+ "|<EXTRA_TOKENS_143>|",
3558
+ "|<EXTRA_TOKENS_144>|",
3559
+ "|<EXTRA_TOKENS_145>|",
3560
+ "|<EXTRA_TOKENS_146>|",
3561
+ "|<EXTRA_TOKENS_147>|",
3562
+ "|<EXTRA_TOKENS_148>|",
3563
+ "|<EXTRA_TOKENS_149>|",
3564
+ "|<EXTRA_TOKENS_150>|",
3565
+ "|<EXTRA_TOKENS_151>|",
3566
+ "|<EXTRA_TOKENS_152>|",
3567
+ "|<EXTRA_TOKENS_153>|",
3568
+ "|<EXTRA_TOKENS_154>|",
3569
+ "|<EXTRA_TOKENS_155>|",
3570
+ "|<EXTRA_TOKENS_156>|",
3571
+ "|<EXTRA_TOKENS_157>|",
3572
+ "|<EXTRA_TOKENS_158>|",
3573
+ "|<EXTRA_TOKENS_159>|",
3574
+ "|<EXTRA_TOKENS_160>|",
3575
+ "|<EXTRA_TOKENS_161>|",
3576
+ "|<EXTRA_TOKENS_162>|",
3577
+ "|<EXTRA_TOKENS_163>|",
3578
+ "|<EXTRA_TOKENS_164>|",
3579
+ "|<EXTRA_TOKENS_165>|",
3580
+ "|<EXTRA_TOKENS_166>|",
3581
+ "|<EXTRA_TOKENS_167>|",
3582
+ "|<EXTRA_TOKENS_168>|",
3583
+ "|<EXTRA_TOKENS_169>|",
3584
+ "|<EXTRA_TOKENS_170>|",
3585
+ "|<EXTRA_TOKENS_171>|",
3586
+ "|<EXTRA_TOKENS_172>|",
3587
+ "|<EXTRA_TOKENS_173>|",
3588
+ "|<EXTRA_TOKENS_174>|",
3589
+ "|<EXTRA_TOKENS_175>|",
3590
+ "|<EXTRA_TOKENS_176>|",
3591
+ "|<EXTRA_TOKENS_177>|",
3592
+ "|<EXTRA_TOKENS_178>|",
3593
+ "|<EXTRA_TOKENS_179>|",
3594
+ "|<EXTRA_TOKENS_180>|",
3595
+ "|<EXTRA_TOKENS_181>|",
3596
+ "|<EXTRA_TOKENS_182>|",
3597
+ "|<EXTRA_TOKENS_183>|",
3598
+ "|<EXTRA_TOKENS_184>|",
3599
+ "|<EXTRA_TOKENS_185>|",
3600
+ "|<EXTRA_TOKENS_186>|",
3601
+ "|<EXTRA_TOKENS_187>|",
3602
+ "|<EXTRA_TOKENS_188>|",
3603
+ "|<EXTRA_TOKENS_189>|",
3604
+ "|<EXTRA_TOKENS_190>|",
3605
+ "|<EXTRA_TOKENS_191>|",
3606
+ "|<EXTRA_TOKENS_192>|",
3607
+ "|<EXTRA_TOKENS_193>|",
3608
+ "|<EXTRA_TOKENS_194>|",
3609
+ "|<EXTRA_TOKENS_195>|",
3610
+ "|<EXTRA_TOKENS_196>|",
3611
+ "|<EXTRA_TOKENS_197>|",
3612
+ "|<EXTRA_TOKENS_198>|",
3613
+ "|<EXTRA_TOKENS_199>|",
3614
+ "|<EXTRA_TOKENS_200>|",
3615
+ "|<EXTRA_TOKENS_201>|",
3616
+ "|<EXTRA_TOKENS_202>|",
3617
+ "|<EXTRA_TOKENS_203>|",
3618
+ "|<EXTRA_TOKENS_204>|",
3619
+ "|<EXTRA_TOKENS_205>|",
3620
+ "|<EXTRA_TOKENS_206>|",
3621
+ "|<EXTRA_TOKENS_207>|",
3622
+ "|<EXTRA_TOKENS_208>|",
3623
+ "|<EXTRA_TOKENS_209>|",
3624
+ "|<EXTRA_TOKENS_210>|",
3625
+ "|<EXTRA_TOKENS_211>|",
3626
+ "|<EXTRA_TOKENS_212>|",
3627
+ "|<EXTRA_TOKENS_213>|",
3628
+ "|<EXTRA_TOKENS_214>|",
3629
+ "|<EXTRA_TOKENS_215>|",
3630
+ "|<EXTRA_TOKENS_216>|",
3631
+ "|<EXTRA_TOKENS_217>|",
3632
+ "|<EXTRA_TOKENS_218>|",
3633
+ "|<EXTRA_TOKENS_219>|",
3634
+ "|<EXTRA_TOKENS_220>|",
3635
+ "|<EXTRA_TOKENS_221>|",
3636
+ "|<EXTRA_TOKENS_222>|",
3637
+ "|<EXTRA_TOKENS_223>|",
3638
+ "|<EXTRA_TOKENS_224>|",
3639
+ "|<EXTRA_TOKENS_225>|",
3640
+ "|<EXTRA_TOKENS_226>|",
3641
+ "|<EXTRA_TOKENS_227>|",
3642
+ "|<EXTRA_TOKENS_228>|",
3643
+ "|<EXTRA_TOKENS_229>|",
3644
+ "|<EXTRA_TOKENS_230>|",
3645
+ "|<EXTRA_TOKENS_231>|",
3646
+ "|<EXTRA_TOKENS_232>|",
3647
+ "|<EXTRA_TOKENS_233>|",
3648
+ "|<EXTRA_TOKENS_234>|",
3649
+ "|<EXTRA_TOKENS_235>|",
3650
+ "|<EXTRA_TOKENS_236>|",
3651
+ "|<EXTRA_TOKENS_237>|",
3652
+ "|<EXTRA_TOKENS_238>|",
3653
+ "|<EXTRA_TOKENS_239>|",
3654
+ "|<EXTRA_TOKENS_240>|",
3655
+ "|<EXTRA_TOKENS_241>|",
3656
+ "|<EXTRA_TOKENS_242>|",
3657
+ "|<EXTRA_TOKENS_243>|",
3658
+ "|<EXTRA_TOKENS_244>|",
3659
+ "|<EXTRA_TOKENS_245>|",
3660
+ "|<EXTRA_TOKENS_246>|",
3661
+ "|<EXTRA_TOKENS_247>|",
3662
+ "|<EXTRA_TOKENS_248>|",
3663
+ "|<EXTRA_TOKENS_249>|",
3664
+ "|<EXTRA_TOKENS_250>|",
3665
+ "|<EXTRA_TOKENS_251>|",
3666
+ "|<EXTRA_TOKENS_252>|",
3667
+ "|<EXTRA_TOKENS_253>|",
3668
+ "|<EXTRA_TOKENS_254>|",
3669
+ "|<EXTRA_TOKENS_255>|",
3670
+ "|<EXTRA_TOKENS_256>|",
3671
+ "|<EXTRA_TOKENS_257>|",
3672
+ "|<EXTRA_TOKENS_258>|",
3673
+ "|<EXTRA_TOKENS_259>|",
3674
+ "|<EXTRA_TOKENS_260>|",
3675
+ "|<EXTRA_TOKENS_261>|",
3676
+ "|<EXTRA_TOKENS_262>|",
3677
+ "|<EXTRA_TOKENS_263>|",
3678
+ "|<EXTRA_TOKENS_264>|",
3679
+ "|<EXTRA_TOKENS_265>|",
3680
+ "|<EXTRA_TOKENS_266>|",
3681
+ "|<EXTRA_TOKENS_267>|",
3682
+ "|<EXTRA_TOKENS_268>|",
3683
+ "|<EXTRA_TOKENS_269>|",
3684
+ "|<EXTRA_TOKENS_270>|",
3685
+ "|<EXTRA_TOKENS_271>|",
3686
+ "|<EXTRA_TOKENS_272>|",
3687
+ "|<EXTRA_TOKENS_273>|",
3688
+ "|<EXTRA_TOKENS_274>|",
3689
+ "|<EXTRA_TOKENS_275>|",
3690
+ "|<EXTRA_TOKENS_276>|",
3691
+ "|<EXTRA_TOKENS_277>|",
3692
+ "|<EXTRA_TOKENS_278>|",
3693
+ "|<EXTRA_TOKENS_279>|",
3694
+ "|<EXTRA_TOKENS_280>|",
3695
+ "|<EXTRA_TOKENS_281>|",
3696
+ "|<EXTRA_TOKENS_282>|",
3697
+ "|<EXTRA_TOKENS_283>|",
3698
+ "|<EXTRA_TOKENS_284>|",
3699
+ "|<EXTRA_TOKENS_285>|",
3700
+ "|<EXTRA_TOKENS_286>|",
3701
+ "|<EXTRA_TOKENS_287>|",
3702
+ "|<EXTRA_TOKENS_288>|",
3703
+ "|<EXTRA_TOKENS_289>|",
3704
+ "|<EXTRA_TOKENS_290>|",
3705
+ "|<EXTRA_TOKENS_291>|",
3706
+ "|<EXTRA_TOKENS_292>|",
3707
+ "|<EXTRA_TOKENS_293>|",
3708
+ "|<EXTRA_TOKENS_294>|",
3709
+ "|<EXTRA_TOKENS_295>|",
3710
+ "|<EXTRA_TOKENS_296>|",
3711
+ "|<EXTRA_TOKENS_297>|",
3712
+ "|<EXTRA_TOKENS_298>|",
3713
+ "|<EXTRA_TOKENS_299>|",
3714
+ "|<EXTRA_TOKENS_300>|",
3715
+ "|<EXTRA_TOKENS_301>|",
3716
+ "|<EXTRA_TOKENS_302>|",
3717
+ "|<EXTRA_TOKENS_303>|",
3718
+ "|<EXTRA_TOKENS_304>|",
3719
+ "|<EXTRA_TOKENS_305>|",
3720
+ "|<EXTRA_TOKENS_306>|",
3721
+ "|<EXTRA_TOKENS_307>|",
3722
+ "|<EXTRA_TOKENS_308>|",
3723
+ "|<EXTRA_TOKENS_309>|",
3724
+ "|<EXTRA_TOKENS_310>|",
3725
+ "|<EXTRA_TOKENS_311>|",
3726
+ "|<EXTRA_TOKENS_312>|",
3727
+ "|<EXTRA_TOKENS_313>|",
3728
+ "|<EXTRA_TOKENS_314>|",
3729
+ "|<EXTRA_TOKENS_315>|",
3730
+ "|<EXTRA_TOKENS_316>|",
3731
+ "|<EXTRA_TOKENS_317>|",
3732
+ "|<EXTRA_TOKENS_318>|",
3733
+ "|<EXTRA_TOKENS_319>|",
3734
+ "|<EXTRA_TOKENS_320>|",
3735
+ "|<EXTRA_TOKENS_321>|",
3736
+ "|<EXTRA_TOKENS_322>|",
3737
+ "|<EXTRA_TOKENS_323>|",
3738
+ "|<EXTRA_TOKENS_324>|",
3739
+ "|<EXTRA_TOKENS_325>|",
3740
+ "|<EXTRA_TOKENS_326>|",
3741
+ "|<EXTRA_TOKENS_327>|",
3742
+ "|<EXTRA_TOKENS_328>|",
3743
+ "|<EXTRA_TOKENS_329>|",
3744
+ "|<EXTRA_TOKENS_330>|",
3745
+ "|<EXTRA_TOKENS_331>|",
3746
+ "|<EXTRA_TOKENS_332>|",
3747
+ "|<EXTRA_TOKENS_333>|",
3748
+ "|<EXTRA_TOKENS_334>|",
3749
+ "|<EXTRA_TOKENS_335>|",
3750
+ "|<EXTRA_TOKENS_336>|",
3751
+ "|<EXTRA_TOKENS_337>|",
3752
+ "|<EXTRA_TOKENS_338>|",
3753
+ "|<EXTRA_TOKENS_339>|",
3754
+ "|<EXTRA_TOKENS_340>|",
3755
+ "|<EXTRA_TOKENS_341>|",
3756
+ "|<EXTRA_TOKENS_342>|",
3757
+ "|<EXTRA_TOKENS_343>|",
3758
+ "|<EXTRA_TOKENS_344>|",
3759
+ "|<EXTRA_TOKENS_345>|",
3760
+ "|<EXTRA_TOKENS_346>|",
3761
+ "|<EXTRA_TOKENS_347>|",
3762
+ "|<EXTRA_TOKENS_348>|",
3763
+ "|<EXTRA_TOKENS_349>|",
3764
+ "|<EXTRA_TOKENS_350>|",
3765
+ "|<EXTRA_TOKENS_351>|",
3766
+ "|<EXTRA_TOKENS_352>|",
3767
+ "|<EXTRA_TOKENS_353>|",
3768
+ "|<EXTRA_TOKENS_354>|",
3769
+ "|<EXTRA_TOKENS_355>|",
3770
+ "|<EXTRA_TOKENS_356>|",
3771
+ "|<EXTRA_TOKENS_357>|",
3772
+ "|<EXTRA_TOKENS_358>|",
3773
+ "|<EXTRA_TOKENS_359>|",
3774
+ "|<EXTRA_TOKENS_360>|",
3775
+ "|<EXTRA_TOKENS_361>|",
3776
+ "|<EXTRA_TOKENS_362>|",
3777
+ "|<EXTRA_TOKENS_363>|",
3778
+ "|<EXTRA_TOKENS_364>|",
3779
+ "|<EXTRA_TOKENS_365>|",
3780
+ "|<EXTRA_TOKENS_366>|",
3781
+ "|<EXTRA_TOKENS_367>|",
3782
+ "|<EXTRA_TOKENS_368>|",
3783
+ "|<EXTRA_TOKENS_369>|",
3784
+ "|<EXTRA_TOKENS_370>|",
3785
+ "|<EXTRA_TOKENS_371>|",
3786
+ "|<EXTRA_TOKENS_372>|",
3787
+ "|<EXTRA_TOKENS_373>|",
3788
+ "|<EXTRA_TOKENS_374>|",
3789
+ "|<EXTRA_TOKENS_375>|",
3790
+ "|<EXTRA_TOKENS_376>|",
3791
+ "|<EXTRA_TOKENS_377>|",
3792
+ "|<EXTRA_TOKENS_378>|",
3793
+ "|<EXTRA_TOKENS_379>|",
3794
+ "|<EXTRA_TOKENS_380>|",
3795
+ "|<EXTRA_TOKENS_381>|",
3796
+ "|<EXTRA_TOKENS_382>|",
3797
+ "|<EXTRA_TOKENS_383>|",
3798
+ "|<EXTRA_TOKENS_384>|",
3799
+ "|<EXTRA_TOKENS_385>|",
3800
+ "|<EXTRA_TOKENS_386>|",
3801
+ "|<EXTRA_TOKENS_387>|",
3802
+ "|<EXTRA_TOKENS_388>|",
3803
+ "|<EXTRA_TOKENS_389>|",
3804
+ "|<EXTRA_TOKENS_390>|",
3805
+ "|<EXTRA_TOKENS_391>|",
3806
+ "|<EXTRA_TOKENS_392>|",
3807
+ "|<EXTRA_TOKENS_393>|",
3808
+ "|<EXTRA_TOKENS_394>|",
3809
+ "|<EXTRA_TOKENS_395>|",
3810
+ "|<EXTRA_TOKENS_396>|",
3811
+ "|<EXTRA_TOKENS_397>|",
3812
+ "|<EXTRA_TOKENS_398>|",
3813
+ "|<EXTRA_TOKENS_399>|",
3814
+ "|<EXTRA_TOKENS_400>|",
3815
+ "|<EXTRA_TOKENS_401>|",
3816
+ "|<EXTRA_TOKENS_402>|",
3817
+ "|<EXTRA_TOKENS_403>|",
3818
+ "|<EXTRA_TOKENS_404>|",
3819
+ "|<EXTRA_TOKENS_405>|",
3820
+ "|<EXTRA_TOKENS_406>|",
3821
+ "|<EXTRA_TOKENS_407>|",
3822
+ "|<EXTRA_TOKENS_408>|",
3823
+ "|<EXTRA_TOKENS_409>|",
3824
+ "|<EXTRA_TOKENS_410>|",
3825
+ "|<EXTRA_TOKENS_411>|",
3826
+ "|<EXTRA_TOKENS_412>|",
3827
+ "|<EXTRA_TOKENS_413>|",
3828
+ "|<EXTRA_TOKENS_414>|",
3829
+ "|<EXTRA_TOKENS_415>|",
3830
+ "|<EXTRA_TOKENS_416>|",
3831
+ "|<EXTRA_TOKENS_417>|",
3832
+ "<im_start>",
3833
+ "<im_end>",
3834
+ "<im_patch>",
3835
+ "<im_col>",
3836
+ "<|image|>"
3837
+ ],
3838
+ "auto_map": {
3839
+ "AutoProcessor": "preprocessing_molmo.MolmoProcessor"
3840
+ },
3841
+ "bos_token": null,
3842
+ "chat_template": "{% for message in messages -%}\n {%- if (loop.index % 2 == 1 and message['role'] != 'user') or \n (loop.index % 2 == 0 and message['role'].lower() != 'assistant') -%}\n {{ raise_exception('Conversation roles must alternate user/assistant/user/assistant/...') }}\n {%- endif -%}\n {{ message['role'].capitalize() + ': ' + message['content'] }}\n {%- if not loop.last -%}\n {{ ' ' }}\n {%- endif %}\n {%- endfor -%}\n {%- if add_generation_prompt -%}\n {{ ' Assistant:' }}\n {%- endif %}",
3843
+ "clean_up_tokenization_spaces": false,
3844
+ "eos_token": "<|endoftext|>",
3845
+ "errors": "replace",
3846
+ "model_max_length": 32768,
3847
+ "pad_token": "<|endoftext|>",
3848
+ "processor_class": "MolmoProcessor",
3849
+ "split_special_tokens": false,
3850
+ "tokenizer_class": "Qwen2Tokenizer",
3851
+ "unk_token": null
3852
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff