Andyson commited on
Commit
0447610
1 Parent(s): 46062ae

huggingface hub

Browse files
app.py CHANGED
@@ -73,7 +73,7 @@ class Arguments:
73
  default='configs/detokenizer/detokenizer_sdxl_qwen_vit_adapted.yaml',
74
  metadata={"help": "config path of sd adapter"})
75
  agent: Optional[str] = field(default='configs/clm_models/agent_7b_sft.yaml',
76
- metadata={"help": "config path of agent model"})
77
  diffusion_path: Optional[str] = field(default='stabilityai/stable-diffusion-xl-base-1.0',
78
  metadata={"help": "diffusion model path"})
79
  port: Optional[str] = field(default=80, metadata={"help": "network port"})
 
73
  default='configs/detokenizer/detokenizer_sdxl_qwen_vit_adapted.yaml',
74
  metadata={"help": "config path of sd adapter"})
75
  agent: Optional[str] = field(default='configs/clm_models/agent_7b_sft.yaml',
76
+ metadata={"help": "Hugging Face model path of agent model"})
77
  diffusion_path: Optional[str] = field(default='stabilityai/stable-diffusion-xl-base-1.0',
78
  metadata={"help": "diffusion model path"})
79
  port: Optional[str] = field(default=80, metadata={"help": "network port"})
configs/clm_models/agent_7b_sft.yaml CHANGED
@@ -15,4 +15,4 @@ output_resampler:
15
 
16
  lm_loss_scale: 1.0
17
  rec_loss_scale: 1.0
18
- pretrained_model_path: pretrained/seed_story/george_sft/pytorch_model.bin
 
15
 
16
  lm_loss_scale: 1.0
17
  rec_loss_scale: 1.0
18
+ pretrained_model_path: TencentARC/SEED-Story
configs/detokenizer/detokenizer_sdxl_qwen_vit_adapted.yaml CHANGED
@@ -12,4 +12,5 @@ resampler:
12
  output2_dim: 1280
13
  ff_mult: 4
14
 
15
- pretrained_model_path: pretrained/detokenizer/detokenizer_george_adapted/checkpoint-4000/pytorch_model.bin
 
 
12
  output2_dim: 1280
13
  ff_mult: 4
14
 
15
+ pretrained_model_path: TencentARC/SEED-Story
16
+ subfolder: detokenizer/detokenizer_george_adapted/checkpoint-4000
configs/tokenizer/clm_llama_tokenizer.yaml CHANGED
@@ -1,2 +1,3 @@
1
  _target_: transformers.LlamaTokenizer.from_pretrained
2
- pretrained_model_name_or_path: pretrained/cvlm_llama2_tokenizer
 
 
1
  _target_: transformers.LlamaTokenizer.from_pretrained
2
+ pretrained_model_name_or_path: TencentARC/SEED-Story
3
+ subfolder: cvlm_llama2_tokenizer
pretrained/cvlm_llama2_tokenizer/added_tokens.json DELETED
@@ -1,68 +0,0 @@
1
- {
2
- "</img>": 32065,
3
- "<img>": 32064,
4
- "<img_00000>": 32000,
5
- "<img_00001>": 32001,
6
- "<img_00002>": 32002,
7
- "<img_00003>": 32003,
8
- "<img_00004>": 32004,
9
- "<img_00005>": 32005,
10
- "<img_00006>": 32006,
11
- "<img_00007>": 32007,
12
- "<img_00008>": 32008,
13
- "<img_00009>": 32009,
14
- "<img_00010>": 32010,
15
- "<img_00011>": 32011,
16
- "<img_00012>": 32012,
17
- "<img_00013>": 32013,
18
- "<img_00014>": 32014,
19
- "<img_00015>": 32015,
20
- "<img_00016>": 32016,
21
- "<img_00017>": 32017,
22
- "<img_00018>": 32018,
23
- "<img_00019>": 32019,
24
- "<img_00020>": 32020,
25
- "<img_00021>": 32021,
26
- "<img_00022>": 32022,
27
- "<img_00023>": 32023,
28
- "<img_00024>": 32024,
29
- "<img_00025>": 32025,
30
- "<img_00026>": 32026,
31
- "<img_00027>": 32027,
32
- "<img_00028>": 32028,
33
- "<img_00029>": 32029,
34
- "<img_00030>": 32030,
35
- "<img_00031>": 32031,
36
- "<img_00032>": 32032,
37
- "<img_00033>": 32033,
38
- "<img_00034>": 32034,
39
- "<img_00035>": 32035,
40
- "<img_00036>": 32036,
41
- "<img_00037>": 32037,
42
- "<img_00038>": 32038,
43
- "<img_00039>": 32039,
44
- "<img_00040>": 32040,
45
- "<img_00041>": 32041,
46
- "<img_00042>": 32042,
47
- "<img_00043>": 32043,
48
- "<img_00044>": 32044,
49
- "<img_00045>": 32045,
50
- "<img_00046>": 32046,
51
- "<img_00047>": 32047,
52
- "<img_00048>": 32048,
53
- "<img_00049>": 32049,
54
- "<img_00050>": 32050,
55
- "<img_00051>": 32051,
56
- "<img_00052>": 32052,
57
- "<img_00053>": 32053,
58
- "<img_00054>": 32054,
59
- "<img_00055>": 32055,
60
- "<img_00056>": 32056,
61
- "<img_00057>": 32057,
62
- "<img_00058>": 32058,
63
- "<img_00059>": 32059,
64
- "<img_00060>": 32060,
65
- "<img_00061>": 32061,
66
- "<img_00062>": 32062,
67
- "<img_00063>": 32063
68
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
pretrained/cvlm_llama2_tokenizer/special_tokens_map.json DELETED
@@ -1,40 +0,0 @@
1
- {
2
- "additional_special_tokens": [
3
- {
4
- "content": "<img>",
5
- "lstrip": false,
6
- "normalized": false,
7
- "rstrip": false,
8
- "single_word": false
9
- },
10
- {
11
- "content": "</img>",
12
- "lstrip": false,
13
- "normalized": false,
14
- "rstrip": false,
15
- "single_word": false
16
- }
17
- ],
18
- "bos_token": {
19
- "content": "<s>",
20
- "lstrip": false,
21
- "normalized": false,
22
- "rstrip": false,
23
- "single_word": false
24
- },
25
- "eos_token": {
26
- "content": "</s>",
27
- "lstrip": false,
28
- "normalized": false,
29
- "rstrip": false,
30
- "single_word": false
31
- },
32
- "pad_token": "<unk>",
33
- "unk_token": {
34
- "content": "<unk>",
35
- "lstrip": false,
36
- "normalized": false,
37
- "rstrip": false,
38
- "single_word": false
39
- }
40
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
pretrained/cvlm_llama2_tokenizer/tokenizer.model DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:9e556afd44213b6bd1be2b850ebbbd98f5481437a8021afaf58ee7fb1818d347
3
- size 499723
 
 
 
 
pretrained/cvlm_llama2_tokenizer/tokenizer_config.json DELETED
@@ -1,573 +0,0 @@
1
- {
2
- "add_bos_token": true,
3
- "add_eos_token": false,
4
- "added_tokens_decoder": {
5
- "0": {
6
- "content": "<unk>",
7
- "lstrip": false,
8
- "normalized": false,
9
- "rstrip": false,
10
- "single_word": false,
11
- "special": true
12
- },
13
- "1": {
14
- "content": "<s>",
15
- "lstrip": false,
16
- "normalized": false,
17
- "rstrip": false,
18
- "single_word": false,
19
- "special": true
20
- },
21
- "2": {
22
- "content": "</s>",
23
- "lstrip": false,
24
- "normalized": false,
25
- "rstrip": false,
26
- "single_word": false,
27
- "special": true
28
- },
29
- "32000": {
30
- "content": "<img_00000>",
31
- "lstrip": false,
32
- "normalized": true,
33
- "rstrip": false,
34
- "single_word": false,
35
- "special": false
36
- },
37
- "32001": {
38
- "content": "<img_00001>",
39
- "lstrip": false,
40
- "normalized": true,
41
- "rstrip": false,
42
- "single_word": false,
43
- "special": false
44
- },
45
- "32002": {
46
- "content": "<img_00002>",
47
- "lstrip": false,
48
- "normalized": true,
49
- "rstrip": false,
50
- "single_word": false,
51
- "special": false
52
- },
53
- "32003": {
54
- "content": "<img_00003>",
55
- "lstrip": false,
56
- "normalized": true,
57
- "rstrip": false,
58
- "single_word": false,
59
- "special": false
60
- },
61
- "32004": {
62
- "content": "<img_00004>",
63
- "lstrip": false,
64
- "normalized": true,
65
- "rstrip": false,
66
- "single_word": false,
67
- "special": false
68
- },
69
- "32005": {
70
- "content": "<img_00005>",
71
- "lstrip": false,
72
- "normalized": true,
73
- "rstrip": false,
74
- "single_word": false,
75
- "special": false
76
- },
77
- "32006": {
78
- "content": "<img_00006>",
79
- "lstrip": false,
80
- "normalized": true,
81
- "rstrip": false,
82
- "single_word": false,
83
- "special": false
84
- },
85
- "32007": {
86
- "content": "<img_00007>",
87
- "lstrip": false,
88
- "normalized": true,
89
- "rstrip": false,
90
- "single_word": false,
91
- "special": false
92
- },
93
- "32008": {
94
- "content": "<img_00008>",
95
- "lstrip": false,
96
- "normalized": true,
97
- "rstrip": false,
98
- "single_word": false,
99
- "special": false
100
- },
101
- "32009": {
102
- "content": "<img_00009>",
103
- "lstrip": false,
104
- "normalized": true,
105
- "rstrip": false,
106
- "single_word": false,
107
- "special": false
108
- },
109
- "32010": {
110
- "content": "<img_00010>",
111
- "lstrip": false,
112
- "normalized": true,
113
- "rstrip": false,
114
- "single_word": false,
115
- "special": false
116
- },
117
- "32011": {
118
- "content": "<img_00011>",
119
- "lstrip": false,
120
- "normalized": true,
121
- "rstrip": false,
122
- "single_word": false,
123
- "special": false
124
- },
125
- "32012": {
126
- "content": "<img_00012>",
127
- "lstrip": false,
128
- "normalized": true,
129
- "rstrip": false,
130
- "single_word": false,
131
- "special": false
132
- },
133
- "32013": {
134
- "content": "<img_00013>",
135
- "lstrip": false,
136
- "normalized": true,
137
- "rstrip": false,
138
- "single_word": false,
139
- "special": false
140
- },
141
- "32014": {
142
- "content": "<img_00014>",
143
- "lstrip": false,
144
- "normalized": true,
145
- "rstrip": false,
146
- "single_word": false,
147
- "special": false
148
- },
149
- "32015": {
150
- "content": "<img_00015>",
151
- "lstrip": false,
152
- "normalized": true,
153
- "rstrip": false,
154
- "single_word": false,
155
- "special": false
156
- },
157
- "32016": {
158
- "content": "<img_00016>",
159
- "lstrip": false,
160
- "normalized": true,
161
- "rstrip": false,
162
- "single_word": false,
163
- "special": false
164
- },
165
- "32017": {
166
- "content": "<img_00017>",
167
- "lstrip": false,
168
- "normalized": true,
169
- "rstrip": false,
170
- "single_word": false,
171
- "special": false
172
- },
173
- "32018": {
174
- "content": "<img_00018>",
175
- "lstrip": false,
176
- "normalized": true,
177
- "rstrip": false,
178
- "single_word": false,
179
- "special": false
180
- },
181
- "32019": {
182
- "content": "<img_00019>",
183
- "lstrip": false,
184
- "normalized": true,
185
- "rstrip": false,
186
- "single_word": false,
187
- "special": false
188
- },
189
- "32020": {
190
- "content": "<img_00020>",
191
- "lstrip": false,
192
- "normalized": true,
193
- "rstrip": false,
194
- "single_word": false,
195
- "special": false
196
- },
197
- "32021": {
198
- "content": "<img_00021>",
199
- "lstrip": false,
200
- "normalized": true,
201
- "rstrip": false,
202
- "single_word": false,
203
- "special": false
204
- },
205
- "32022": {
206
- "content": "<img_00022>",
207
- "lstrip": false,
208
- "normalized": true,
209
- "rstrip": false,
210
- "single_word": false,
211
- "special": false
212
- },
213
- "32023": {
214
- "content": "<img_00023>",
215
- "lstrip": false,
216
- "normalized": true,
217
- "rstrip": false,
218
- "single_word": false,
219
- "special": false
220
- },
221
- "32024": {
222
- "content": "<img_00024>",
223
- "lstrip": false,
224
- "normalized": true,
225
- "rstrip": false,
226
- "single_word": false,
227
- "special": false
228
- },
229
- "32025": {
230
- "content": "<img_00025>",
231
- "lstrip": false,
232
- "normalized": true,
233
- "rstrip": false,
234
- "single_word": false,
235
- "special": false
236
- },
237
- "32026": {
238
- "content": "<img_00026>",
239
- "lstrip": false,
240
- "normalized": true,
241
- "rstrip": false,
242
- "single_word": false,
243
- "special": false
244
- },
245
- "32027": {
246
- "content": "<img_00027>",
247
- "lstrip": false,
248
- "normalized": true,
249
- "rstrip": false,
250
- "single_word": false,
251
- "special": false
252
- },
253
- "32028": {
254
- "content": "<img_00028>",
255
- "lstrip": false,
256
- "normalized": true,
257
- "rstrip": false,
258
- "single_word": false,
259
- "special": false
260
- },
261
- "32029": {
262
- "content": "<img_00029>",
263
- "lstrip": false,
264
- "normalized": true,
265
- "rstrip": false,
266
- "single_word": false,
267
- "special": false
268
- },
269
- "32030": {
270
- "content": "<img_00030>",
271
- "lstrip": false,
272
- "normalized": true,
273
- "rstrip": false,
274
- "single_word": false,
275
- "special": false
276
- },
277
- "32031": {
278
- "content": "<img_00031>",
279
- "lstrip": false,
280
- "normalized": true,
281
- "rstrip": false,
282
- "single_word": false,
283
- "special": false
284
- },
285
- "32032": {
286
- "content": "<img_00032>",
287
- "lstrip": false,
288
- "normalized": true,
289
- "rstrip": false,
290
- "single_word": false,
291
- "special": false
292
- },
293
- "32033": {
294
- "content": "<img_00033>",
295
- "lstrip": false,
296
- "normalized": true,
297
- "rstrip": false,
298
- "single_word": false,
299
- "special": false
300
- },
301
- "32034": {
302
- "content": "<img_00034>",
303
- "lstrip": false,
304
- "normalized": true,
305
- "rstrip": false,
306
- "single_word": false,
307
- "special": false
308
- },
309
- "32035": {
310
- "content": "<img_00035>",
311
- "lstrip": false,
312
- "normalized": true,
313
- "rstrip": false,
314
- "single_word": false,
315
- "special": false
316
- },
317
- "32036": {
318
- "content": "<img_00036>",
319
- "lstrip": false,
320
- "normalized": true,
321
- "rstrip": false,
322
- "single_word": false,
323
- "special": false
324
- },
325
- "32037": {
326
- "content": "<img_00037>",
327
- "lstrip": false,
328
- "normalized": true,
329
- "rstrip": false,
330
- "single_word": false,
331
- "special": false
332
- },
333
- "32038": {
334
- "content": "<img_00038>",
335
- "lstrip": false,
336
- "normalized": true,
337
- "rstrip": false,
338
- "single_word": false,
339
- "special": false
340
- },
341
- "32039": {
342
- "content": "<img_00039>",
343
- "lstrip": false,
344
- "normalized": true,
345
- "rstrip": false,
346
- "single_word": false,
347
- "special": false
348
- },
349
- "32040": {
350
- "content": "<img_00040>",
351
- "lstrip": false,
352
- "normalized": true,
353
- "rstrip": false,
354
- "single_word": false,
355
- "special": false
356
- },
357
- "32041": {
358
- "content": "<img_00041>",
359
- "lstrip": false,
360
- "normalized": true,
361
- "rstrip": false,
362
- "single_word": false,
363
- "special": false
364
- },
365
- "32042": {
366
- "content": "<img_00042>",
367
- "lstrip": false,
368
- "normalized": true,
369
- "rstrip": false,
370
- "single_word": false,
371
- "special": false
372
- },
373
- "32043": {
374
- "content": "<img_00043>",
375
- "lstrip": false,
376
- "normalized": true,
377
- "rstrip": false,
378
- "single_word": false,
379
- "special": false
380
- },
381
- "32044": {
382
- "content": "<img_00044>",
383
- "lstrip": false,
384
- "normalized": true,
385
- "rstrip": false,
386
- "single_word": false,
387
- "special": false
388
- },
389
- "32045": {
390
- "content": "<img_00045>",
391
- "lstrip": false,
392
- "normalized": true,
393
- "rstrip": false,
394
- "single_word": false,
395
- "special": false
396
- },
397
- "32046": {
398
- "content": "<img_00046>",
399
- "lstrip": false,
400
- "normalized": true,
401
- "rstrip": false,
402
- "single_word": false,
403
- "special": false
404
- },
405
- "32047": {
406
- "content": "<img_00047>",
407
- "lstrip": false,
408
- "normalized": true,
409
- "rstrip": false,
410
- "single_word": false,
411
- "special": false
412
- },
413
- "32048": {
414
- "content": "<img_00048>",
415
- "lstrip": false,
416
- "normalized": true,
417
- "rstrip": false,
418
- "single_word": false,
419
- "special": false
420
- },
421
- "32049": {
422
- "content": "<img_00049>",
423
- "lstrip": false,
424
- "normalized": true,
425
- "rstrip": false,
426
- "single_word": false,
427
- "special": false
428
- },
429
- "32050": {
430
- "content": "<img_00050>",
431
- "lstrip": false,
432
- "normalized": true,
433
- "rstrip": false,
434
- "single_word": false,
435
- "special": false
436
- },
437
- "32051": {
438
- "content": "<img_00051>",
439
- "lstrip": false,
440
- "normalized": true,
441
- "rstrip": false,
442
- "single_word": false,
443
- "special": false
444
- },
445
- "32052": {
446
- "content": "<img_00052>",
447
- "lstrip": false,
448
- "normalized": true,
449
- "rstrip": false,
450
- "single_word": false,
451
- "special": false
452
- },
453
- "32053": {
454
- "content": "<img_00053>",
455
- "lstrip": false,
456
- "normalized": true,
457
- "rstrip": false,
458
- "single_word": false,
459
- "special": false
460
- },
461
- "32054": {
462
- "content": "<img_00054>",
463
- "lstrip": false,
464
- "normalized": true,
465
- "rstrip": false,
466
- "single_word": false,
467
- "special": false
468
- },
469
- "32055": {
470
- "content": "<img_00055>",
471
- "lstrip": false,
472
- "normalized": true,
473
- "rstrip": false,
474
- "single_word": false,
475
- "special": false
476
- },
477
- "32056": {
478
- "content": "<img_00056>",
479
- "lstrip": false,
480
- "normalized": true,
481
- "rstrip": false,
482
- "single_word": false,
483
- "special": false
484
- },
485
- "32057": {
486
- "content": "<img_00057>",
487
- "lstrip": false,
488
- "normalized": true,
489
- "rstrip": false,
490
- "single_word": false,
491
- "special": false
492
- },
493
- "32058": {
494
- "content": "<img_00058>",
495
- "lstrip": false,
496
- "normalized": true,
497
- "rstrip": false,
498
- "single_word": false,
499
- "special": false
500
- },
501
- "32059": {
502
- "content": "<img_00059>",
503
- "lstrip": false,
504
- "normalized": true,
505
- "rstrip": false,
506
- "single_word": false,
507
- "special": false
508
- },
509
- "32060": {
510
- "content": "<img_00060>",
511
- "lstrip": false,
512
- "normalized": true,
513
- "rstrip": false,
514
- "single_word": false,
515
- "special": false
516
- },
517
- "32061": {
518
- "content": "<img_00061>",
519
- "lstrip": false,
520
- "normalized": true,
521
- "rstrip": false,
522
- "single_word": false,
523
- "special": false
524
- },
525
- "32062": {
526
- "content": "<img_00062>",
527
- "lstrip": false,
528
- "normalized": true,
529
- "rstrip": false,
530
- "single_word": false,
531
- "special": false
532
- },
533
- "32063": {
534
- "content": "<img_00063>",
535
- "lstrip": false,
536
- "normalized": true,
537
- "rstrip": false,
538
- "single_word": false,
539
- "special": false
540
- },
541
- "32064": {
542
- "content": "<img>",
543
- "lstrip": false,
544
- "normalized": false,
545
- "rstrip": false,
546
- "single_word": false,
547
- "special": true
548
- },
549
- "32065": {
550
- "content": "</img>",
551
- "lstrip": false,
552
- "normalized": false,
553
- "rstrip": false,
554
- "single_word": false,
555
- "special": true
556
- }
557
- },
558
- "additional_special_tokens": [
559
- "<img>",
560
- "</img>"
561
- ],
562
- "bos_token": "<s>",
563
- "clean_up_tokenization_spaces": false,
564
- "eos_token": "</s>",
565
- "legacy": false,
566
- "model_max_length": 1000000000000000019884624838656,
567
- "pad_token": "<unk>",
568
- "sp_model_kwargs": {},
569
- "spaces_between_special_tokens": false,
570
- "tokenizer_class": "LlamaTokenizer",
571
- "unk_token": "<unk>",
572
- "use_default_system_prompt": false
573
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
pretrained/detokenizer/detokenizer_george_adapted/checkpoint-4000/pytorch_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:785d4e670ccfdce33b493d0aada60ee5c116918468098b2ed82ae2c28f31e423
3
- size 6471628187
 
 
 
 
pretrained/seed_story/george_sft/pytorch_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:c7e46794a2aab38f3f59484a4f4bb4c839217ef17c4329977b0a11839f462b94
3
- size 14709979626
 
 
 
 
src/models/qwen_visual.py CHANGED
@@ -411,13 +411,21 @@ class VisionTransformerWithAttnPool(nn.Module):
411
  return self(images)
412
 
413
  @classmethod
414
- def from_pretrained(cls, pretrained_model_path=None, **kawrgs):
415
- model = cls(**kawrgs)
 
416
  if pretrained_model_path is not None:
417
- ckpt = torch.load(pretrained_model_path, map_location='cpu')
418
- missing, unexpected = model.load_state_dict(ckpt, strict=False)
419
- print('Load ckpt of qwen visual encoder')
420
- print('missing keys: ', len(missing), 'unexpected keys:', len(unexpected))
 
 
 
 
 
 
 
421
 
422
  return model
423
 
 
411
  return self(images)
412
 
413
  @classmethod
414
+ def from_pretrained(cls, pretrained_model_path=None, subfolder=None, **kwargs):
415
+ model = cls(**kwargs)
416
+
417
  if pretrained_model_path is not None:
418
+ # Load model from Hugging Face Hub with subfolder specification
419
+ if 'TencentARC/SEED-Story' in pretrained_model_path:
420
+ # Use `subfolder` to specify the location within the repository
421
+ ckpt = AutoModel.from_pretrained(pretrained_model_path, subfolder=subfolder)
422
+ missing, unexpected = model.load_state_dict(ckpt.state_dict(), strict=False)
423
+ print('Detokenizer model, missing keys: ', len(missing), 'unexpected keys:', len(unexpected))
424
+ else:
425
+ # For local path loading
426
+ ckpt = torch.load(pretrained_model_path, map_location='cpu')
427
+ missing, unexpected = model.load_state_dict(ckpt, strict=False)
428
+ print('Detokenizer model, missing keys: ', len(missing), 'unexpected keys:', len(unexpected))
429
 
430
  return model
431
 
src/models_clm/models.py CHANGED
@@ -2,6 +2,7 @@ import torch
2
  import torch.nn as nn
3
  from transformers import LlamaForCausalLM, LlamaConfig
4
  from transformers import LogitsProcessor, LogitsProcessorList
 
5
  from .generation import AutoImageTokenGenerationProcessor
6
  import torch.nn.functional as F
7
 
@@ -220,13 +221,24 @@ class ContinuousLVLM(nn.Module):
220
  'past_key_values': output_past_key_values
221
  }
222
 
 
223
  @classmethod
224
  def from_pretrained(cls, llm, input_resampler, output_resampler, pretrained_model_path=None, **kwargs):
225
  model = cls(llm=llm, input_resampler=input_resampler, output_resampler=output_resampler, **kwargs)
 
226
  if pretrained_model_path is not None:
227
- ckpt = torch.load(pretrained_model_path, map_location='cpu')
228
- missing, unexpected = model.load_state_dict(ckpt, strict=False)
229
- print('agent model, missing keys: ', len(missing), 'unexpected keys:', len(unexpected))
 
 
 
 
 
 
 
 
 
230
  return model
231
 
232
 
 
2
  import torch.nn as nn
3
  from transformers import LlamaForCausalLM, LlamaConfig
4
  from transformers import LogitsProcessor, LogitsProcessorList
5
+ from transformers import AutoModel
6
  from .generation import AutoImageTokenGenerationProcessor
7
  import torch.nn.functional as F
8
 
 
221
  'past_key_values': output_past_key_values
222
  }
223
 
224
+
225
  @classmethod
226
  def from_pretrained(cls, llm, input_resampler, output_resampler, pretrained_model_path=None, **kwargs):
227
  model = cls(llm=llm, input_resampler=input_resampler, output_resampler=output_resampler, **kwargs)
228
+
229
  if pretrained_model_path is not None:
230
+ # Check if the path is intended for Hugging Face Hub
231
+ if 'TencentARC/SEED-Story' in pretrained_model_path:
232
+ # Load from a specific subfolder within the Hugging Face repository
233
+ ckpt = AutoModel.from_pretrained(pretrained_model_path, subfolder="seed_story/george_sft")
234
+ missing, unexpected = model.load_state_dict(ckpt.state_dict(), strict=False)
235
+ print('Agent model, missing keys: ', len(missing), 'unexpected keys:', len(unexpected))
236
+ else:
237
+ # For local path loading
238
+ ckpt = torch.load(pretrained_model_path, map_location='cpu')
239
+ missing, unexpected = model.load_state_dict(ckpt, strict=False)
240
+ print('Agent model, missing keys: ', len(missing), 'unexpected keys:', len(unexpected))
241
+
242
  return model
243
 
244
 
src/models_ipa/adapter_modules.py CHANGED
@@ -20,6 +20,7 @@ else:
20
  from diffusers.loaders import LoraLoaderMixin
21
  from diffusers.models.lora import LoRALinearLayer
22
  from diffusers.models.unet_2d_blocks import DownBlock2D
 
23
 
24
 
25
  # from .pipeline_stable_diffusion_xl_t2i_edit import StableDiffusionXLText2ImageAndEditPipeline
@@ -348,12 +349,22 @@ class SDXLAdapter(nn.Module):
348
  return image_embeds, pooled_image_embeds
349
 
350
  @classmethod
351
- def from_pretrained(cls, unet, resampler, pretrained_model_path=None, **kwargs):
352
- model = cls(unet=unet, resampler=resampler, **kwargs)
 
353
  if pretrained_model_path is not None:
354
- ckpt = torch.load(pretrained_model_path, map_location='cpu')
355
- missing, unexpected = model.load_state_dict(ckpt, strict=False)
356
- print('missing keys: ', len(missing), 'unexpected keys:', len(unexpected))
 
 
 
 
 
 
 
 
 
357
  return model
358
 
359
  def init_pipe(self,
 
20
  from diffusers.loaders import LoraLoaderMixin
21
  from diffusers.models.lora import LoRALinearLayer
22
  from diffusers.models.unet_2d_blocks import DownBlock2D
23
+ from transformers import AutoModel
24
 
25
 
26
  # from .pipeline_stable_diffusion_xl_t2i_edit import StableDiffusionXLText2ImageAndEditPipeline
 
349
  return image_embeds, pooled_image_embeds
350
 
351
  @classmethod
352
+ def from_pretrained(cls, pretrained_model_path=None, subfolder=None, **kwargs):
353
+ model = cls(**kwargs)
354
+
355
  if pretrained_model_path is not None:
356
+ # Load model from Hugging Face Hub with subfolder specification
357
+ if 'TencentARC/SEED-Story' in pretrained_model_path:
358
+ # Use `subfolder` to specify the location within the repository
359
+ ckpt = AutoModel.from_pretrained(pretrained_model_path, subfolder=subfolder)
360
+ missing, unexpected = model.load_state_dict(ckpt.state_dict(), strict=False)
361
+ print('Detokenizer model, missing keys: ', len(missing), 'unexpected keys:', len(unexpected))
362
+ else:
363
+ # For local path loading
364
+ ckpt = torch.load(pretrained_model_path, map_location='cpu')
365
+ missing, unexpected = model.load_state_dict(ckpt, strict=False)
366
+ print('Detokenizer model, missing keys: ', len(missing), 'unexpected keys:', len(unexpected))
367
+
368
  return model
369
 
370
  def init_pipe(self,