gcpai
#4
by
yang778
- opened
- .gitattributes +0 -1
- README.md +0 -11
- autoencoder_fix_kl-f8-trinart_characters.ckpt → autoencoder_kl-f8-trinart_characters.ckpt +1 -1
- feature_extractor/preprocessor_config.json +0 -28
- model_index.json +0 -33
- safety_checker/config.json +0 -181
- safety_checker/pytorch_model.bin +0 -3
- scheduler/scheduler_config.json +0 -14
- text_encoder/config.json +0 -25
- text_encoder/pytorch_model.bin +0 -3
- tokenizer/merges.txt +0 -0
- tokenizer/special_tokens_map.json +0 -24
- tokenizer/tokenizer_config.json +0 -34
- tokenizer/vocab.json +0 -0
- unet/config.json +0 -44
- unet/diffusion_pytorch_model.bin +0 -3
- vae/config.json +0 -30
- vae/diffusion_pytorch_model.bin +0 -3
.gitattributes
CHANGED
@@ -32,4 +32,3 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
32 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
33 |
trinart_characters_it4_v1.ckpt filter=lfs diff=lfs merge=lfs -text
|
34 |
autoencoder_kl-f8-trinart_characters.ckpt filter=lfs diff=lfs merge=lfs -text
|
35 |
-
autoencoder_fix_kl-f8-trinart_characters.ckpt filter=lfs diff=lfs merge=lfs -text
|
|
|
32 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
33 |
trinart_characters_it4_v1.ckpt filter=lfs diff=lfs merge=lfs -text
|
34 |
autoencoder_kl-f8-trinart_characters.ckpt filter=lfs diff=lfs merge=lfs -text
|
|
README.md
CHANGED
@@ -7,15 +7,6 @@ tags:
|
|
7 |
license: creativeml-openrail-m
|
8 |
---
|
9 |
|
10 |
-
|
11 |
-
|
12 |
-
## Note
|
13 |
-
|
14 |
-
A newer version of this model has been released:
|
15 |
-
https://huggingface.co/naclbit/trinart_derrida_characters_v2_stable_diffusion
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
## Stable Diffusion TrinArt Characters model v1
|
20 |
|
21 |
trinart_characters_19.2m_stable_diffusion_v1 is a stable diffusion v1-based model trained by roughly 19.2M anime/manga style images (pre-rolled augmented images included) plus final finetuning by about 50,000 images. This model seeks for a sweet spot between artistic style versatility and anatomical quality within the given model spec of SDv1.
|
@@ -28,8 +19,6 @@ This is the same version 1 model that was released in AI Novelist/TrinArt servic
|
|
28 |
|
29 |
#### Custom autoencoder
|
30 |
|
31 |
-
*Note: There was a wrong checkpoint uploaded before 5 Nov 2022. The file has been replaced with the latest checkpoint.*
|
32 |
-
|
33 |
We also provide a separate checkpoint for the custom KL autoencoder. As suggested by the Latent Diffusion paper, we found that training the autoencoder and the latent diffusion model separately improves the result. Since the official stable diffusion script does not support loading the other VAE, in order to run it in your script, you'll need to override state_dict for first_stage_model.
|
34 |
|
35 |
The popular WebUI has the script to load separate first_stage_model parameters.
|
|
|
7 |
license: creativeml-openrail-m
|
8 |
---
|
9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
## Stable Diffusion TrinArt Characters model v1
|
11 |
|
12 |
trinart_characters_19.2m_stable_diffusion_v1 is a stable diffusion v1-based model trained by roughly 19.2M anime/manga style images (pre-rolled augmented images included) plus final finetuning by about 50,000 images. This model seeks for a sweet spot between artistic style versatility and anatomical quality within the given model spec of SDv1.
|
|
|
19 |
|
20 |
#### Custom autoencoder
|
21 |
|
|
|
|
|
22 |
We also provide a separate checkpoint for the custom KL autoencoder. As suggested by the Latent Diffusion paper, we found that training the autoencoder and the latent diffusion model separately improves the result. Since the official stable diffusion script does not support loading the other VAE, in order to run it in your script, you'll need to override state_dict for first_stage_model.
|
23 |
|
24 |
The popular WebUI has the script to load separate first_stage_model parameters.
|
autoencoder_fix_kl-f8-trinart_characters.ckpt → autoencoder_kl-f8-trinart_characters.ckpt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 404661793
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d2dd1c82220e31a72bd9958dda249ed7f94faf875d5123ae3aab7a1950a82a8f
|
3 |
size 404661793
|
feature_extractor/preprocessor_config.json
DELETED
@@ -1,28 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"crop_size": {
|
3 |
-
"height": 224,
|
4 |
-
"width": 224
|
5 |
-
},
|
6 |
-
"do_center_crop": true,
|
7 |
-
"do_convert_rgb": true,
|
8 |
-
"do_normalize": true,
|
9 |
-
"do_rescale": true,
|
10 |
-
"do_resize": true,
|
11 |
-
"feature_extractor_type": "CLIPFeatureExtractor",
|
12 |
-
"image_mean": [
|
13 |
-
0.48145466,
|
14 |
-
0.4578275,
|
15 |
-
0.40821073
|
16 |
-
],
|
17 |
-
"image_processor_type": "CLIPFeatureExtractor",
|
18 |
-
"image_std": [
|
19 |
-
0.26862954,
|
20 |
-
0.26130258,
|
21 |
-
0.27577711
|
22 |
-
],
|
23 |
-
"resample": 3,
|
24 |
-
"rescale_factor": 0.00392156862745098,
|
25 |
-
"size": {
|
26 |
-
"shortest_edge": 224
|
27 |
-
}
|
28 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
model_index.json
DELETED
@@ -1,33 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"_class_name": "StableDiffusionPipeline",
|
3 |
-
"_diffusers_version": "0.12.0.dev0",
|
4 |
-
"feature_extractor": [
|
5 |
-
"transformers",
|
6 |
-
"CLIPImageProcessor"
|
7 |
-
],
|
8 |
-
"requires_safety_checker": true,
|
9 |
-
"safety_checker": [
|
10 |
-
"stable_diffusion",
|
11 |
-
"StableDiffusionSafetyChecker"
|
12 |
-
],
|
13 |
-
"scheduler": [
|
14 |
-
"diffusers",
|
15 |
-
"PNDMScheduler"
|
16 |
-
],
|
17 |
-
"text_encoder": [
|
18 |
-
"transformers",
|
19 |
-
"CLIPTextModel"
|
20 |
-
],
|
21 |
-
"tokenizer": [
|
22 |
-
"transformers",
|
23 |
-
"CLIPTokenizer"
|
24 |
-
],
|
25 |
-
"unet": [
|
26 |
-
"diffusers",
|
27 |
-
"UNet2DConditionModel"
|
28 |
-
],
|
29 |
-
"vae": [
|
30 |
-
"diffusers",
|
31 |
-
"AutoencoderKL"
|
32 |
-
]
|
33 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
safety_checker/config.json
DELETED
@@ -1,181 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"_commit_hash": "cb41f3a270d63d454d385fc2e4f571c487c253c5",
|
3 |
-
"_name_or_path": "CompVis/stable-diffusion-safety-checker",
|
4 |
-
"architectures": [
|
5 |
-
"StableDiffusionSafetyChecker"
|
6 |
-
],
|
7 |
-
"initializer_factor": 1.0,
|
8 |
-
"logit_scale_init_value": 2.6592,
|
9 |
-
"model_type": "clip",
|
10 |
-
"projection_dim": 768,
|
11 |
-
"text_config": {
|
12 |
-
"_name_or_path": "",
|
13 |
-
"add_cross_attention": false,
|
14 |
-
"architectures": null,
|
15 |
-
"attention_dropout": 0.0,
|
16 |
-
"bad_words_ids": null,
|
17 |
-
"begin_suppress_tokens": null,
|
18 |
-
"bos_token_id": 0,
|
19 |
-
"chunk_size_feed_forward": 0,
|
20 |
-
"cross_attention_hidden_size": null,
|
21 |
-
"decoder_start_token_id": null,
|
22 |
-
"diversity_penalty": 0.0,
|
23 |
-
"do_sample": false,
|
24 |
-
"dropout": 0.0,
|
25 |
-
"early_stopping": false,
|
26 |
-
"encoder_no_repeat_ngram_size": 0,
|
27 |
-
"eos_token_id": 2,
|
28 |
-
"exponential_decay_length_penalty": null,
|
29 |
-
"finetuning_task": null,
|
30 |
-
"forced_bos_token_id": null,
|
31 |
-
"forced_eos_token_id": null,
|
32 |
-
"hidden_act": "quick_gelu",
|
33 |
-
"hidden_size": 768,
|
34 |
-
"id2label": {
|
35 |
-
"0": "LABEL_0",
|
36 |
-
"1": "LABEL_1"
|
37 |
-
},
|
38 |
-
"initializer_factor": 1.0,
|
39 |
-
"initializer_range": 0.02,
|
40 |
-
"intermediate_size": 3072,
|
41 |
-
"is_decoder": false,
|
42 |
-
"is_encoder_decoder": false,
|
43 |
-
"label2id": {
|
44 |
-
"LABEL_0": 0,
|
45 |
-
"LABEL_1": 1
|
46 |
-
},
|
47 |
-
"layer_norm_eps": 1e-05,
|
48 |
-
"length_penalty": 1.0,
|
49 |
-
"max_length": 20,
|
50 |
-
"max_position_embeddings": 77,
|
51 |
-
"min_length": 0,
|
52 |
-
"model_type": "clip_text_model",
|
53 |
-
"no_repeat_ngram_size": 0,
|
54 |
-
"num_attention_heads": 12,
|
55 |
-
"num_beam_groups": 1,
|
56 |
-
"num_beams": 1,
|
57 |
-
"num_hidden_layers": 12,
|
58 |
-
"num_return_sequences": 1,
|
59 |
-
"output_attentions": false,
|
60 |
-
"output_hidden_states": false,
|
61 |
-
"output_scores": false,
|
62 |
-
"pad_token_id": 1,
|
63 |
-
"prefix": null,
|
64 |
-
"problem_type": null,
|
65 |
-
"projection_dim": 512,
|
66 |
-
"pruned_heads": {},
|
67 |
-
"remove_invalid_values": false,
|
68 |
-
"repetition_penalty": 1.0,
|
69 |
-
"return_dict": true,
|
70 |
-
"return_dict_in_generate": false,
|
71 |
-
"sep_token_id": null,
|
72 |
-
"suppress_tokens": null,
|
73 |
-
"task_specific_params": null,
|
74 |
-
"temperature": 1.0,
|
75 |
-
"tf_legacy_loss": false,
|
76 |
-
"tie_encoder_decoder": false,
|
77 |
-
"tie_word_embeddings": true,
|
78 |
-
"tokenizer_class": null,
|
79 |
-
"top_k": 50,
|
80 |
-
"top_p": 1.0,
|
81 |
-
"torch_dtype": null,
|
82 |
-
"torchscript": false,
|
83 |
-
"transformers_version": "4.26.0.dev0",
|
84 |
-
"typical_p": 1.0,
|
85 |
-
"use_bfloat16": false,
|
86 |
-
"vocab_size": 49408
|
87 |
-
},
|
88 |
-
"text_config_dict": {
|
89 |
-
"hidden_size": 768,
|
90 |
-
"intermediate_size": 3072,
|
91 |
-
"num_attention_heads": 12,
|
92 |
-
"num_hidden_layers": 12
|
93 |
-
},
|
94 |
-
"torch_dtype": "float32",
|
95 |
-
"transformers_version": null,
|
96 |
-
"vision_config": {
|
97 |
-
"_name_or_path": "",
|
98 |
-
"add_cross_attention": false,
|
99 |
-
"architectures": null,
|
100 |
-
"attention_dropout": 0.0,
|
101 |
-
"bad_words_ids": null,
|
102 |
-
"begin_suppress_tokens": null,
|
103 |
-
"bos_token_id": null,
|
104 |
-
"chunk_size_feed_forward": 0,
|
105 |
-
"cross_attention_hidden_size": null,
|
106 |
-
"decoder_start_token_id": null,
|
107 |
-
"diversity_penalty": 0.0,
|
108 |
-
"do_sample": false,
|
109 |
-
"dropout": 0.0,
|
110 |
-
"early_stopping": false,
|
111 |
-
"encoder_no_repeat_ngram_size": 0,
|
112 |
-
"eos_token_id": null,
|
113 |
-
"exponential_decay_length_penalty": null,
|
114 |
-
"finetuning_task": null,
|
115 |
-
"forced_bos_token_id": null,
|
116 |
-
"forced_eos_token_id": null,
|
117 |
-
"hidden_act": "quick_gelu",
|
118 |
-
"hidden_size": 1024,
|
119 |
-
"id2label": {
|
120 |
-
"0": "LABEL_0",
|
121 |
-
"1": "LABEL_1"
|
122 |
-
},
|
123 |
-
"image_size": 224,
|
124 |
-
"initializer_factor": 1.0,
|
125 |
-
"initializer_range": 0.02,
|
126 |
-
"intermediate_size": 4096,
|
127 |
-
"is_decoder": false,
|
128 |
-
"is_encoder_decoder": false,
|
129 |
-
"label2id": {
|
130 |
-
"LABEL_0": 0,
|
131 |
-
"LABEL_1": 1
|
132 |
-
},
|
133 |
-
"layer_norm_eps": 1e-05,
|
134 |
-
"length_penalty": 1.0,
|
135 |
-
"max_length": 20,
|
136 |
-
"min_length": 0,
|
137 |
-
"model_type": "clip_vision_model",
|
138 |
-
"no_repeat_ngram_size": 0,
|
139 |
-
"num_attention_heads": 16,
|
140 |
-
"num_beam_groups": 1,
|
141 |
-
"num_beams": 1,
|
142 |
-
"num_channels": 3,
|
143 |
-
"num_hidden_layers": 24,
|
144 |
-
"num_return_sequences": 1,
|
145 |
-
"output_attentions": false,
|
146 |
-
"output_hidden_states": false,
|
147 |
-
"output_scores": false,
|
148 |
-
"pad_token_id": null,
|
149 |
-
"patch_size": 14,
|
150 |
-
"prefix": null,
|
151 |
-
"problem_type": null,
|
152 |
-
"projection_dim": 512,
|
153 |
-
"pruned_heads": {},
|
154 |
-
"remove_invalid_values": false,
|
155 |
-
"repetition_penalty": 1.0,
|
156 |
-
"return_dict": true,
|
157 |
-
"return_dict_in_generate": false,
|
158 |
-
"sep_token_id": null,
|
159 |
-
"suppress_tokens": null,
|
160 |
-
"task_specific_params": null,
|
161 |
-
"temperature": 1.0,
|
162 |
-
"tf_legacy_loss": false,
|
163 |
-
"tie_encoder_decoder": false,
|
164 |
-
"tie_word_embeddings": true,
|
165 |
-
"tokenizer_class": null,
|
166 |
-
"top_k": 50,
|
167 |
-
"top_p": 1.0,
|
168 |
-
"torch_dtype": null,
|
169 |
-
"torchscript": false,
|
170 |
-
"transformers_version": "4.26.0.dev0",
|
171 |
-
"typical_p": 1.0,
|
172 |
-
"use_bfloat16": false
|
173 |
-
},
|
174 |
-
"vision_config_dict": {
|
175 |
-
"hidden_size": 1024,
|
176 |
-
"intermediate_size": 4096,
|
177 |
-
"num_attention_heads": 16,
|
178 |
-
"num_hidden_layers": 24,
|
179 |
-
"patch_size": 14
|
180 |
-
}
|
181 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
safety_checker/pytorch_model.bin
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:16d28f2b37109f222cdc33620fdd262102ac32112be0352a7f77e9614b35a394
|
3 |
-
size 1216064769
|
|
|
|
|
|
|
|
scheduler/scheduler_config.json
DELETED
@@ -1,14 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"_class_name": "PNDMScheduler",
|
3 |
-
"_diffusers_version": "0.12.0.dev0",
|
4 |
-
"beta_end": 0.012,
|
5 |
-
"beta_schedule": "scaled_linear",
|
6 |
-
"beta_start": 0.00085,
|
7 |
-
"clip_sample": false,
|
8 |
-
"num_train_timesteps": 1000,
|
9 |
-
"prediction_type": "epsilon",
|
10 |
-
"set_alpha_to_one": false,
|
11 |
-
"skip_prk_steps": true,
|
12 |
-
"steps_offset": 1,
|
13 |
-
"trained_betas": null
|
14 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
text_encoder/config.json
DELETED
@@ -1,25 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"_name_or_path": "openai/clip-vit-large-patch14",
|
3 |
-
"architectures": [
|
4 |
-
"CLIPTextModel"
|
5 |
-
],
|
6 |
-
"attention_dropout": 0.0,
|
7 |
-
"bos_token_id": 0,
|
8 |
-
"dropout": 0.0,
|
9 |
-
"eos_token_id": 2,
|
10 |
-
"hidden_act": "quick_gelu",
|
11 |
-
"hidden_size": 768,
|
12 |
-
"initializer_factor": 1.0,
|
13 |
-
"initializer_range": 0.02,
|
14 |
-
"intermediate_size": 3072,
|
15 |
-
"layer_norm_eps": 1e-05,
|
16 |
-
"max_position_embeddings": 77,
|
17 |
-
"model_type": "clip_text_model",
|
18 |
-
"num_attention_heads": 12,
|
19 |
-
"num_hidden_layers": 12,
|
20 |
-
"pad_token_id": 1,
|
21 |
-
"projection_dim": 768,
|
22 |
-
"torch_dtype": "float32",
|
23 |
-
"transformers_version": "4.26.0.dev0",
|
24 |
-
"vocab_size": 49408
|
25 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
text_encoder/pytorch_model.bin
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:aad0e7cec126b7ee2a36e52fef25ffc4a8c41ff0b2c7a1cd07f5e693680edab5
|
3 |
-
size 492307041
|
|
|
|
|
|
|
|
tokenizer/merges.txt
DELETED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer/special_tokens_map.json
DELETED
@@ -1,24 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"bos_token": {
|
3 |
-
"content": "<|startoftext|>",
|
4 |
-
"lstrip": false,
|
5 |
-
"normalized": true,
|
6 |
-
"rstrip": false,
|
7 |
-
"single_word": false
|
8 |
-
},
|
9 |
-
"eos_token": {
|
10 |
-
"content": "<|endoftext|>",
|
11 |
-
"lstrip": false,
|
12 |
-
"normalized": true,
|
13 |
-
"rstrip": false,
|
14 |
-
"single_word": false
|
15 |
-
},
|
16 |
-
"pad_token": "<|endoftext|>",
|
17 |
-
"unk_token": {
|
18 |
-
"content": "<|endoftext|>",
|
19 |
-
"lstrip": false,
|
20 |
-
"normalized": true,
|
21 |
-
"rstrip": false,
|
22 |
-
"single_word": false
|
23 |
-
}
|
24 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tokenizer/tokenizer_config.json
DELETED
@@ -1,34 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"add_prefix_space": false,
|
3 |
-
"bos_token": {
|
4 |
-
"__type": "AddedToken",
|
5 |
-
"content": "<|startoftext|>",
|
6 |
-
"lstrip": false,
|
7 |
-
"normalized": true,
|
8 |
-
"rstrip": false,
|
9 |
-
"single_word": false
|
10 |
-
},
|
11 |
-
"do_lower_case": true,
|
12 |
-
"eos_token": {
|
13 |
-
"__type": "AddedToken",
|
14 |
-
"content": "<|endoftext|>",
|
15 |
-
"lstrip": false,
|
16 |
-
"normalized": true,
|
17 |
-
"rstrip": false,
|
18 |
-
"single_word": false
|
19 |
-
},
|
20 |
-
"errors": "replace",
|
21 |
-
"model_max_length": 77,
|
22 |
-
"name_or_path": "openai/clip-vit-large-patch14",
|
23 |
-
"pad_token": "<|endoftext|>",
|
24 |
-
"special_tokens_map_file": "./special_tokens_map.json",
|
25 |
-
"tokenizer_class": "CLIPTokenizer",
|
26 |
-
"unk_token": {
|
27 |
-
"__type": "AddedToken",
|
28 |
-
"content": "<|endoftext|>",
|
29 |
-
"lstrip": false,
|
30 |
-
"normalized": true,
|
31 |
-
"rstrip": false,
|
32 |
-
"single_word": false
|
33 |
-
}
|
34 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tokenizer/vocab.json
DELETED
The diff for this file is too large to render.
See raw diff
|
|
unet/config.json
DELETED
@@ -1,44 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"_class_name": "UNet2DConditionModel",
|
3 |
-
"_diffusers_version": "0.12.0.dev0",
|
4 |
-
"act_fn": "silu",
|
5 |
-
"attention_head_dim": 8,
|
6 |
-
"block_out_channels": [
|
7 |
-
320,
|
8 |
-
640,
|
9 |
-
1280,
|
10 |
-
1280
|
11 |
-
],
|
12 |
-
"center_input_sample": false,
|
13 |
-
"class_embed_type": null,
|
14 |
-
"cross_attention_dim": 768,
|
15 |
-
"down_block_types": [
|
16 |
-
"CrossAttnDownBlock2D",
|
17 |
-
"CrossAttnDownBlock2D",
|
18 |
-
"CrossAttnDownBlock2D",
|
19 |
-
"DownBlock2D"
|
20 |
-
],
|
21 |
-
"downsample_padding": 1,
|
22 |
-
"dual_cross_attention": false,
|
23 |
-
"flip_sin_to_cos": true,
|
24 |
-
"freq_shift": 0,
|
25 |
-
"in_channels": 4,
|
26 |
-
"layers_per_block": 2,
|
27 |
-
"mid_block_scale_factor": 1,
|
28 |
-
"mid_block_type": "UNetMidBlock2DCrossAttn",
|
29 |
-
"norm_eps": 1e-05,
|
30 |
-
"norm_num_groups": 32,
|
31 |
-
"num_class_embeds": null,
|
32 |
-
"only_cross_attention": false,
|
33 |
-
"out_channels": 4,
|
34 |
-
"resnet_time_scale_shift": "default",
|
35 |
-
"sample_size": 64,
|
36 |
-
"up_block_types": [
|
37 |
-
"UpBlock2D",
|
38 |
-
"CrossAttnUpBlock2D",
|
39 |
-
"CrossAttnUpBlock2D",
|
40 |
-
"CrossAttnUpBlock2D"
|
41 |
-
],
|
42 |
-
"upcast_attention": false,
|
43 |
-
"use_linear_projection": false
|
44 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
unet/diffusion_pytorch_model.bin
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:22975eb114b3a1b085d0e4f199210aad32a6ec1a85547d48f2e4a6f4c9410c8b
|
3 |
-
size 3438366373
|
|
|
|
|
|
|
|
vae/config.json
DELETED
@@ -1,30 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"_class_name": "AutoencoderKL",
|
3 |
-
"_diffusers_version": "0.12.0.dev0",
|
4 |
-
"act_fn": "silu",
|
5 |
-
"block_out_channels": [
|
6 |
-
128,
|
7 |
-
256,
|
8 |
-
512,
|
9 |
-
512
|
10 |
-
],
|
11 |
-
"down_block_types": [
|
12 |
-
"DownEncoderBlock2D",
|
13 |
-
"DownEncoderBlock2D",
|
14 |
-
"DownEncoderBlock2D",
|
15 |
-
"DownEncoderBlock2D"
|
16 |
-
],
|
17 |
-
"in_channels": 3,
|
18 |
-
"latent_channels": 4,
|
19 |
-
"layers_per_block": 2,
|
20 |
-
"norm_num_groups": 32,
|
21 |
-
"out_channels": 3,
|
22 |
-
"sample_size": 512,
|
23 |
-
"scaling_factor": 0.18215,
|
24 |
-
"up_block_types": [
|
25 |
-
"UpDecoderBlock2D",
|
26 |
-
"UpDecoderBlock2D",
|
27 |
-
"UpDecoderBlock2D",
|
28 |
-
"UpDecoderBlock2D"
|
29 |
-
]
|
30 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
vae/diffusion_pytorch_model.bin
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:6723bacd3c60b11a2b4e6007338a54c6964c210116c3ccecb3bfc80e218afc8f
|
3 |
-
size 334711857
|
|
|
|
|
|
|
|