jyoung105 commited on
Commit
1d5b2ff
·
verified ·
1 Parent(s): c77ba6f

Upload StableCascadeDecoderPipeline

Browse files
decoder/config.json ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "StableCascadeUNet",
3
+ "_diffusers_version": "0.30.2",
4
+ "_name_or_path": "/root/.cache/huggingface/hub/models--stabilityai--stable-cascade/snapshots/a89f66d459ae653e3b4d4f992a7c3789d0dc4d16/decoder",
5
+ "block_out_channels": [
6
+ 320,
7
+ 640,
8
+ 1280,
9
+ 1280
10
+ ],
11
+ "block_types_per_layer": [
12
+ [
13
+ "SDCascadeResBlock",
14
+ "SDCascadeTimestepBlock"
15
+ ],
16
+ [
17
+ "SDCascadeResBlock",
18
+ "SDCascadeTimestepBlock"
19
+ ],
20
+ [
21
+ "SDCascadeResBlock",
22
+ "SDCascadeTimestepBlock",
23
+ "SDCascadeAttnBlock"
24
+ ],
25
+ [
26
+ "SDCascadeResBlock",
27
+ "SDCascadeTimestepBlock",
28
+ "SDCascadeAttnBlock"
29
+ ]
30
+ ],
31
+ "clip_image_in_channels": null,
32
+ "clip_seq": 4,
33
+ "clip_text_in_channels": null,
34
+ "clip_text_pooled_in_channels": 1280,
35
+ "conditioning_dim": 1280,
36
+ "down_blocks_repeat_mappers": [
37
+ 1,
38
+ 1,
39
+ 1,
40
+ 1
41
+ ],
42
+ "down_num_layers_per_block": [
43
+ 2,
44
+ 6,
45
+ 28,
46
+ 6
47
+ ],
48
+ "dropout": [
49
+ 0,
50
+ 0,
51
+ 0.1,
52
+ 0.1
53
+ ],
54
+ "effnet_in_channels": 16,
55
+ "in_channels": 4,
56
+ "kernel_size": 3,
57
+ "num_attention_heads": [
58
+ 0,
59
+ 0,
60
+ 20,
61
+ 20
62
+ ],
63
+ "out_channels": 4,
64
+ "patch_size": 2,
65
+ "pixel_mapper_in_channels": 3,
66
+ "self_attn": true,
67
+ "switch_level": null,
68
+ "timestep_conditioning_type": [
69
+ "sca"
70
+ ],
71
+ "timestep_ratio_embedding_dim": 64,
72
+ "up_blocks_repeat_mappers": [
73
+ 3,
74
+ 3,
75
+ 2,
76
+ 2
77
+ ],
78
+ "up_num_layers_per_block": [
79
+ 6,
80
+ 28,
81
+ 6,
82
+ 2
83
+ ]
84
+ }
decoder/diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f9575dfa6c2535ad65733d6257d17a7b1e1b54b7eafb251ce9556595f3bc0c9
3
+ size 3126071088
model_index.json CHANGED
@@ -1,20 +1,12 @@
1
  {
2
- "_class_name": "StableCascadePriorPipeline",
3
  "_diffusers_version": "0.30.2",
4
- "_name_or_path": "stabilityai/stable-cascade-prior",
5
- "feature_extractor": [
6
- "transformers",
7
- "CLIPImageProcessor"
8
- ],
9
- "image_encoder": [
10
- "transformers",
11
- "CLIPVisionModelWithProjection"
12
- ],
13
- "prior": [
14
  "diffusers",
15
  "StableCascadeUNet"
16
  ],
17
- "resolution_multiple": 42.67,
18
  "scheduler": [
19
  "diffusers",
20
  "DDPMWuerstchenScheduler"
@@ -26,5 +18,9 @@
26
  "tokenizer": [
27
  "transformers",
28
  "CLIPTokenizerFast"
 
 
 
 
29
  ]
30
  }
 
1
  {
2
+ "_class_name": "StableCascadeDecoderPipeline",
3
  "_diffusers_version": "0.30.2",
4
+ "_name_or_path": "stabilityai/stable-cascade",
5
+ "decoder": [
 
 
 
 
 
 
 
 
6
  "diffusers",
7
  "StableCascadeUNet"
8
  ],
9
+ "latent_dim_scale": 10.67,
10
  "scheduler": [
11
  "diffusers",
12
  "DDPMWuerstchenScheduler"
 
18
  "tokenizer": [
19
  "transformers",
20
  "CLIPTokenizerFast"
21
+ ],
22
+ "vqgan": [
23
+ "wuerstchen",
24
+ "PaellaVQModel"
25
  ]
26
  }
text_encoder/config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "/root/.cache/huggingface/hub/models--stabilityai--stable-cascade-prior/snapshots/7ca32c21c3b4d4e35bbb94fcfedfb4fa2259bd91/text_encoder",
3
  "architectures": [
4
  "CLIPTextModelWithProjection"
5
  ],
 
1
  {
2
+ "_name_or_path": "/root/.cache/huggingface/hub/models--stabilityai--stable-cascade/snapshots/a89f66d459ae653e3b4d4f992a7c3789d0dc4d16/text_encoder",
3
  "architectures": [
4
  "CLIPTextModelWithProjection"
5
  ],
vqgan/config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "PaellaVQModel",
3
+ "_diffusers_version": "0.30.2",
4
+ "_name_or_path": "/root/.cache/huggingface/hub/models--stabilityai--stable-cascade/snapshots/a89f66d459ae653e3b4d4f992a7c3789d0dc4d16/vqgan",
5
+ "bottleneck_blocks": 12,
6
+ "embed_dim": 384,
7
+ "in_channels": 3,
8
+ "latent_channels": 4,
9
+ "levels": 2,
10
+ "num_vq_embeddings": 8192,
11
+ "out_channels": 3,
12
+ "scale_factor": 0.3764,
13
+ "up_down_scale_factor": 2
14
+ }
vqgan/diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ac32fab5177329dac907b2480c8c00aeefc712dfd92c2d52263a9c64b426b26
3
+ size 36825828