update diffusers weights
Browse files- image_encoder/config.json +3 -3
- image_encoder/model.safetensors +2 -2
- model_index.json +3 -4
- prior/config.json +39 -36
- prior/diffusion_pytorch_model.safetensors +2 -2
- scheduler/scheduler_config.json +1 -1
- text_encoder/config.json +3 -3
- text_encoder/model.safetensors +2 -2
- tokenizer/tokenizer.json +2 -16
image_encoder/config.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "
|
3 |
"architectures": [
|
4 |
"CLIPVisionModelWithProjection"
|
5 |
],
|
@@ -18,6 +18,6 @@
|
|
18 |
"num_hidden_layers": 24,
|
19 |
"patch_size": 14,
|
20 |
"projection_dim": 768,
|
21 |
-
"torch_dtype": "
|
22 |
-
"transformers_version": "4.38.
|
23 |
}
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "openai/clip-vit-large-patch14",
|
3 |
"architectures": [
|
4 |
"CLIPVisionModelWithProjection"
|
5 |
],
|
|
|
18 |
"num_hidden_layers": 24,
|
19 |
"patch_size": 14,
|
20 |
"projection_dim": 768,
|
21 |
+
"torch_dtype": "float32",
|
22 |
+
"transformers_version": "4.38.2"
|
23 |
}
|
image_encoder/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:77b33d2a3a643650857672e880ccf73adbaf114fbbadec36d142ee9d48af7e20
|
3 |
+
size 1215912728
|
model_index.json
CHANGED
@@ -1,7 +1,6 @@
|
|
1 |
{
|
2 |
"_class_name": "StableCascadePriorPipeline",
|
3 |
-
"_diffusers_version": "0.
|
4 |
-
"_name_or_path": "StableCascade-prior/",
|
5 |
"feature_extractor": [
|
6 |
"transformers",
|
7 |
"CLIPImageProcessor"
|
@@ -11,8 +10,8 @@
|
|
11 |
"CLIPVisionModelWithProjection"
|
12 |
],
|
13 |
"prior": [
|
14 |
-
"
|
15 |
-
"
|
16 |
],
|
17 |
"resolution_multiple": 42.67,
|
18 |
"scheduler": [
|
|
|
1 |
{
|
2 |
"_class_name": "StableCascadePriorPipeline",
|
3 |
+
"_diffusers_version": "0.27.0.dev0",
|
|
|
4 |
"feature_extractor": [
|
5 |
"transformers",
|
6 |
"CLIPImageProcessor"
|
|
|
10 |
"CLIPVisionModelWithProjection"
|
11 |
],
|
12 |
"prior": [
|
13 |
+
"diffusers",
|
14 |
+
"StableCascadeUNet"
|
15 |
],
|
16 |
"resolution_multiple": 42.67,
|
17 |
"scheduler": [
|
prior/config.json
CHANGED
@@ -1,61 +1,64 @@
|
|
1 |
{
|
2 |
-
"_class_name": "
|
3 |
-
"_diffusers_version": "0.
|
4 |
-
"
|
5 |
-
|
6 |
-
|
7 |
-
1,
|
8 |
-
1
|
9 |
-
],
|
10 |
-
[
|
11 |
-
1,
|
12 |
-
1
|
13 |
-
]
|
14 |
],
|
15 |
-
"
|
16 |
[
|
17 |
-
|
18 |
-
|
|
|
19 |
],
|
20 |
[
|
21 |
-
|
22 |
-
|
|
|
23 |
]
|
24 |
],
|
25 |
-
"
|
26 |
-
"
|
27 |
-
"
|
28 |
-
"
|
29 |
-
"
|
30 |
-
"
|
31 |
-
|
32 |
-
|
33 |
-
|
|
|
|
|
|
|
34 |
],
|
35 |
-
"c_in": 16,
|
36 |
-
"c_out": 16,
|
37 |
-
"c_pixels": null,
|
38 |
-
"c_r": 64,
|
39 |
"dropout": [
|
40 |
0.1,
|
41 |
0.1
|
42 |
],
|
|
|
|
|
43 |
"kernel_size": 3,
|
44 |
-
"
|
45 |
-
"CTA",
|
46 |
-
"CTA"
|
47 |
-
],
|
48 |
-
"nhead": [
|
49 |
32,
|
50 |
32
|
51 |
],
|
|
|
52 |
"patch_size": 1,
|
|
|
53 |
"self_attn": true,
|
54 |
"switch_level": [
|
55 |
false
|
56 |
],
|
57 |
-
"
|
58 |
"sca",
|
59 |
"crp"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
60 |
]
|
61 |
}
|
|
|
1 |
{
|
2 |
+
"_class_name": "StableCascadeUNet",
|
3 |
+
"_diffusers_version": "0.27.0.dev0",
|
4 |
+
"block_out_channels": [
|
5 |
+
2048,
|
6 |
+
2048
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
],
|
8 |
+
"block_types_per_layer": [
|
9 |
[
|
10 |
+
"SDCascadeResBlock",
|
11 |
+
"SDCascadeTimestepBlock",
|
12 |
+
"SDCascadeAttnBlock"
|
13 |
],
|
14 |
[
|
15 |
+
"SDCascadeResBlock",
|
16 |
+
"SDCascadeTimestepBlock",
|
17 |
+
"SDCascadeAttnBlock"
|
18 |
]
|
19 |
],
|
20 |
+
"clip_image_in_channels": 768,
|
21 |
+
"clip_seq": 4,
|
22 |
+
"clip_text_in_channels": 1280,
|
23 |
+
"clip_text_pooled_in_channels": 1280,
|
24 |
+
"conditioning_dim": 2048,
|
25 |
+
"down_blocks_repeat_mappers": [
|
26 |
+
1,
|
27 |
+
1
|
28 |
+
],
|
29 |
+
"down_num_layers_per_block": [
|
30 |
+
8,
|
31 |
+
24
|
32 |
],
|
|
|
|
|
|
|
|
|
33 |
"dropout": [
|
34 |
0.1,
|
35 |
0.1
|
36 |
],
|
37 |
+
"effnet_in_channels": null,
|
38 |
+
"in_channels": 16,
|
39 |
"kernel_size": 3,
|
40 |
+
"num_attention_heads": [
|
|
|
|
|
|
|
|
|
41 |
32,
|
42 |
32
|
43 |
],
|
44 |
+
"out_channels": 16,
|
45 |
"patch_size": 1,
|
46 |
+
"pixel_mapper_in_channels": null,
|
47 |
"self_attn": true,
|
48 |
"switch_level": [
|
49 |
false
|
50 |
],
|
51 |
+
"timestep_conditioning_type": [
|
52 |
"sca",
|
53 |
"crp"
|
54 |
+
],
|
55 |
+
"timestep_ratio_embedding_dim": 64,
|
56 |
+
"up_blocks_repeat_mappers": [
|
57 |
+
1,
|
58 |
+
1
|
59 |
+
],
|
60 |
+
"up_num_layers_per_block": [
|
61 |
+
24,
|
62 |
+
8
|
63 |
]
|
64 |
}
|
prior/diffusion_pytorch_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0a2c7aa62c503780b85f74fd513b1b99c12ea4f83422bdbad5ac264aa68efb4b
|
3 |
+
size 14356584672
|
scheduler/scheduler_config.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
{
|
2 |
"_class_name": "DDPMWuerstchenScheduler",
|
3 |
-
"_diffusers_version": "0.
|
4 |
"s": 0.008,
|
5 |
"scaler": 1.0
|
6 |
}
|
|
|
1 |
{
|
2 |
"_class_name": "DDPMWuerstchenScheduler",
|
3 |
+
"_diffusers_version": "0.27.0.dev0",
|
4 |
"s": 0.008,
|
5 |
"scaler": 1.0
|
6 |
}
|
text_encoder/config.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "
|
3 |
"architectures": [
|
4 |
"CLIPTextModelWithProjection"
|
5 |
],
|
@@ -19,7 +19,7 @@
|
|
19 |
"num_hidden_layers": 32,
|
20 |
"pad_token_id": 1,
|
21 |
"projection_dim": 1280,
|
22 |
-
"torch_dtype": "
|
23 |
-
"transformers_version": "4.38.
|
24 |
"vocab_size": 49408
|
25 |
}
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "laion/CLIP-ViT-bigG-14-laion2B-39B-b160k",
|
3 |
"architectures": [
|
4 |
"CLIPTextModelWithProjection"
|
5 |
],
|
|
|
19 |
"num_hidden_layers": 32,
|
20 |
"pad_token_id": 1,
|
21 |
"projection_dim": 1280,
|
22 |
+
"torch_dtype": "float32",
|
23 |
+
"transformers_version": "4.38.2",
|
24 |
"vocab_size": 49408
|
25 |
}
|
text_encoder/model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fa5b2e6f4c2efc2d82e4b8312faec1a5540eabfc6415126c9a05c8436a530ef4
|
3 |
+
size 2778702264
|
tokenizer/tokenizer.json
CHANGED
@@ -1,21 +1,7 @@
|
|
1 |
{
|
2 |
"version": "1.0",
|
3 |
-
"truncation":
|
4 |
-
|
5 |
-
"max_length": 77,
|
6 |
-
"strategy": "LongestFirst",
|
7 |
-
"stride": 0
|
8 |
-
},
|
9 |
-
"padding": {
|
10 |
-
"strategy": {
|
11 |
-
"Fixed": 77
|
12 |
-
},
|
13 |
-
"direction": "Right",
|
14 |
-
"pad_to_multiple_of": null,
|
15 |
-
"pad_id": 49407,
|
16 |
-
"pad_type_id": 0,
|
17 |
-
"pad_token": "<|endoftext|>"
|
18 |
-
},
|
19 |
"added_tokens": [
|
20 |
{
|
21 |
"id": 49406,
|
|
|
1 |
{
|
2 |
"version": "1.0",
|
3 |
+
"truncation": null,
|
4 |
+
"padding": null,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
5 |
"added_tokens": [
|
6 |
{
|
7 |
"id": 49406,
|