BlackSadadou
commited on
Commit
•
adfff27
1
Parent(s):
9654474
End of training
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- README.md +66 -0
- checkpoint-1000/optimizer.bin +3 -0
- checkpoint-1000/random_states_0.pkl +3 -0
- checkpoint-1000/scaler.pt +3 -0
- checkpoint-1000/scheduler.bin +3 -0
- checkpoint-1000/unet/config.json +68 -0
- checkpoint-1000/unet/diffusion_pytorch_model.safetensors +3 -0
- checkpoint-1000/unet_ema/config.json +75 -0
- checkpoint-1000/unet_ema/diffusion_pytorch_model.safetensors +3 -0
- checkpoint-1500/optimizer.bin +3 -0
- checkpoint-1500/random_states_0.pkl +3 -0
- checkpoint-1500/scaler.pt +3 -0
- checkpoint-1500/scheduler.bin +3 -0
- checkpoint-1500/unet/config.json +68 -0
- checkpoint-1500/unet/diffusion_pytorch_model.safetensors +3 -0
- checkpoint-1500/unet_ema/config.json +75 -0
- checkpoint-1500/unet_ema/diffusion_pytorch_model.safetensors +3 -0
- checkpoint-2000/optimizer.bin +3 -0
- checkpoint-2000/random_states_0.pkl +3 -0
- checkpoint-2000/scaler.pt +3 -0
- checkpoint-2000/scheduler.bin +3 -0
- checkpoint-2000/unet/config.json +68 -0
- checkpoint-2000/unet/diffusion_pytorch_model.safetensors +3 -0
- checkpoint-2000/unet_ema/config.json +75 -0
- checkpoint-2000/unet_ema/diffusion_pytorch_model.safetensors +3 -0
- checkpoint-2500/optimizer.bin +3 -0
- checkpoint-2500/random_states_0.pkl +3 -0
- checkpoint-2500/scaler.pt +3 -0
- checkpoint-2500/scheduler.bin +3 -0
- checkpoint-2500/unet/config.json +68 -0
- checkpoint-2500/unet/diffusion_pytorch_model.safetensors +3 -0
- checkpoint-2500/unet_ema/config.json +75 -0
- checkpoint-2500/unet_ema/diffusion_pytorch_model.safetensors +3 -0
- checkpoint-500/optimizer.bin +3 -0
- checkpoint-500/random_states_0.pkl +3 -0
- checkpoint-500/scaler.pt +3 -0
- checkpoint-500/scheduler.bin +3 -0
- checkpoint-500/unet/config.json +68 -0
- checkpoint-500/unet/diffusion_pytorch_model.safetensors +3 -0
- checkpoint-500/unet_ema/config.json +75 -0
- checkpoint-500/unet_ema/diffusion_pytorch_model.safetensors +3 -0
- feature_extractor/preprocessor_config.json +27 -0
- logs/text2image-fine-tune/1713384066.8972213/events.out.tfevents.1713384066.880f55e348cb.9865.1 +3 -0
- logs/text2image-fine-tune/1713384066.8989856/hparams.yml +51 -0
- logs/text2image-fine-tune/events.out.tfevents.1713384066.880f55e348cb.9865.0 +3 -0
- model_index.json +38 -0
- safety_checker/config.json +28 -0
- safety_checker/model.safetensors +3 -0
- scheduler/scheduler_config.json +15 -0
- text_encoder/config.json +25 -0
README.md
ADDED
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
license: creativeml-openrail-m
|
3 |
+
library_name: diffusers
|
4 |
+
tags:
|
5 |
+
- stable-diffusion
|
6 |
+
- stable-diffusion-diffusers
|
7 |
+
- text-to-image
|
8 |
+
- diffusers
|
9 |
+
- diffusers-training
|
10 |
+
inference: true
|
11 |
+
base_model: CompVis/stable-diffusion-v1-4
|
12 |
+
---
|
13 |
+
|
14 |
+
<!-- This model card has been generated automatically according to the information the training script had access to. You
|
15 |
+
should probably proofread and complete it, then remove this comment. -->
|
16 |
+
|
17 |
+
|
18 |
+
# Text-to-image finetuning - BlackSadadou/SD-Black-IMGgen-Model-v2
|
19 |
+
|
20 |
+
This pipeline was finetuned from **CompVis/stable-diffusion-v1-4** on the **BlackSadadou/STBlackIMGgen** dataset. Below are some example images generated with the finetuned pipeline using the following prompts: ["Illustration graphique d'un Chat dans le style 'BLACK's Style'."]:
|
21 |
+
|
22 |
+
![val_imgs_grid](./val_imgs_grid.png)
|
23 |
+
|
24 |
+
|
25 |
+
## Pipeline usage
|
26 |
+
|
27 |
+
You can use the pipeline like so:
|
28 |
+
|
29 |
+
```python
|
30 |
+
from diffusers import DiffusionPipeline
|
31 |
+
import torch
|
32 |
+
|
33 |
+
pipeline = DiffusionPipeline.from_pretrained("BlackSadadou/SD-Black-IMGgen-Model-v2", torch_dtype=torch.float16)
|
34 |
+
prompt = "Illustration graphique d'un Chat dans le style 'BLACK's Style'."
|
35 |
+
image = pipeline(prompt).images[0]
|
36 |
+
image.save("my_image.png")
|
37 |
+
```
|
38 |
+
|
39 |
+
## Training info
|
40 |
+
|
41 |
+
These are the key hyperparameters used during training:
|
42 |
+
|
43 |
+
* Epochs: 417
|
44 |
+
* Learning rate: 1e-05
|
45 |
+
* Batch size: 1
|
46 |
+
* Gradient accumulation steps: 4
|
47 |
+
* Image resolution: 512
|
48 |
+
* Mixed-precision: fp16
|
49 |
+
|
50 |
+
|
51 |
+
|
52 |
+
## Intended uses & limitations
|
53 |
+
|
54 |
+
#### How to use
|
55 |
+
|
56 |
+
```python
|
57 |
+
# TODO: add an example code snippet for running this diffusion pipeline
|
58 |
+
```
|
59 |
+
|
60 |
+
#### Limitations and bias
|
61 |
+
|
62 |
+
[TODO: provide examples of latent issues and potential remediations]
|
63 |
+
|
64 |
+
## Training details
|
65 |
+
|
66 |
+
[TODO: describe the data used to train the model]
|
checkpoint-1000/optimizer.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5d3216b0db67cbb7d2153e7cf3bebeed01ff33b71464e0029f3bd62331df0ea0
|
3 |
+
size 6876750164
|
checkpoint-1000/random_states_0.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b52f6f702f0fa799743a01ff9710ad632d92a8b446f679284fca4e48a7a9d764
|
3 |
+
size 14344
|
checkpoint-1000/scaler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9d8fdcd0311eba9854fff738038ed4c1a269832665b4d88ba4e4e3d02a1a7e0e
|
3 |
+
size 988
|
checkpoint-1000/scheduler.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a0f160203d06dd30a0364e6f3333114f4314b27dc2c533167af3874ea59fe143
|
3 |
+
size 1000
|
checkpoint-1000/unet/config.json
ADDED
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_class_name": "UNet2DConditionModel",
|
3 |
+
"_diffusers_version": "0.28.0.dev0",
|
4 |
+
"_name_or_path": "CompVis/stable-diffusion-v1-4",
|
5 |
+
"act_fn": "silu",
|
6 |
+
"addition_embed_type": null,
|
7 |
+
"addition_embed_type_num_heads": 64,
|
8 |
+
"addition_time_embed_dim": null,
|
9 |
+
"attention_head_dim": 8,
|
10 |
+
"attention_type": "default",
|
11 |
+
"block_out_channels": [
|
12 |
+
320,
|
13 |
+
640,
|
14 |
+
1280,
|
15 |
+
1280
|
16 |
+
],
|
17 |
+
"center_input_sample": false,
|
18 |
+
"class_embed_type": null,
|
19 |
+
"class_embeddings_concat": false,
|
20 |
+
"conv_in_kernel": 3,
|
21 |
+
"conv_out_kernel": 3,
|
22 |
+
"cross_attention_dim": 768,
|
23 |
+
"cross_attention_norm": null,
|
24 |
+
"down_block_types": [
|
25 |
+
"CrossAttnDownBlock2D",
|
26 |
+
"CrossAttnDownBlock2D",
|
27 |
+
"CrossAttnDownBlock2D",
|
28 |
+
"DownBlock2D"
|
29 |
+
],
|
30 |
+
"downsample_padding": 1,
|
31 |
+
"dropout": 0.0,
|
32 |
+
"dual_cross_attention": false,
|
33 |
+
"encoder_hid_dim": null,
|
34 |
+
"encoder_hid_dim_type": null,
|
35 |
+
"flip_sin_to_cos": true,
|
36 |
+
"freq_shift": 0,
|
37 |
+
"in_channels": 4,
|
38 |
+
"layers_per_block": 2,
|
39 |
+
"mid_block_only_cross_attention": null,
|
40 |
+
"mid_block_scale_factor": 1,
|
41 |
+
"mid_block_type": "UNetMidBlock2DCrossAttn",
|
42 |
+
"norm_eps": 1e-05,
|
43 |
+
"norm_num_groups": 32,
|
44 |
+
"num_attention_heads": null,
|
45 |
+
"num_class_embeds": null,
|
46 |
+
"only_cross_attention": false,
|
47 |
+
"out_channels": 4,
|
48 |
+
"projection_class_embeddings_input_dim": null,
|
49 |
+
"resnet_out_scale_factor": 1.0,
|
50 |
+
"resnet_skip_time_act": false,
|
51 |
+
"resnet_time_scale_shift": "default",
|
52 |
+
"reverse_transformer_layers_per_block": null,
|
53 |
+
"sample_size": 64,
|
54 |
+
"time_cond_proj_dim": null,
|
55 |
+
"time_embedding_act_fn": null,
|
56 |
+
"time_embedding_dim": null,
|
57 |
+
"time_embedding_type": "positional",
|
58 |
+
"timestep_post_act": null,
|
59 |
+
"transformer_layers_per_block": 1,
|
60 |
+
"up_block_types": [
|
61 |
+
"UpBlock2D",
|
62 |
+
"CrossAttnUpBlock2D",
|
63 |
+
"CrossAttnUpBlock2D",
|
64 |
+
"CrossAttnUpBlock2D"
|
65 |
+
],
|
66 |
+
"upcast_attention": false,
|
67 |
+
"use_linear_projection": false
|
68 |
+
}
|
checkpoint-1000/unet/diffusion_pytorch_model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:253a5b12623a101347f0339de15a3589d5c819cac4ef0dc60cabdf08c3bc73e1
|
3 |
+
size 3438167536
|
checkpoint-1000/unet_ema/config.json
ADDED
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_class_name": "UNet2DConditionModel",
|
3 |
+
"_diffusers_version": "0.28.0.dev0",
|
4 |
+
"_name_or_path": "CompVis/stable-diffusion-v1-4",
|
5 |
+
"act_fn": "silu",
|
6 |
+
"addition_embed_type": null,
|
7 |
+
"addition_embed_type_num_heads": 64,
|
8 |
+
"addition_time_embed_dim": null,
|
9 |
+
"attention_head_dim": 8,
|
10 |
+
"attention_type": "default",
|
11 |
+
"block_out_channels": [
|
12 |
+
320,
|
13 |
+
640,
|
14 |
+
1280,
|
15 |
+
1280
|
16 |
+
],
|
17 |
+
"center_input_sample": false,
|
18 |
+
"class_embed_type": null,
|
19 |
+
"class_embeddings_concat": false,
|
20 |
+
"conv_in_kernel": 3,
|
21 |
+
"conv_out_kernel": 3,
|
22 |
+
"cross_attention_dim": 768,
|
23 |
+
"cross_attention_norm": null,
|
24 |
+
"decay": 0.9999,
|
25 |
+
"down_block_types": [
|
26 |
+
"CrossAttnDownBlock2D",
|
27 |
+
"CrossAttnDownBlock2D",
|
28 |
+
"CrossAttnDownBlock2D",
|
29 |
+
"DownBlock2D"
|
30 |
+
],
|
31 |
+
"downsample_padding": 1,
|
32 |
+
"dropout": 0.0,
|
33 |
+
"dual_cross_attention": false,
|
34 |
+
"encoder_hid_dim": null,
|
35 |
+
"encoder_hid_dim_type": null,
|
36 |
+
"flip_sin_to_cos": true,
|
37 |
+
"freq_shift": 0,
|
38 |
+
"in_channels": 4,
|
39 |
+
"inv_gamma": 1.0,
|
40 |
+
"layers_per_block": 2,
|
41 |
+
"mid_block_only_cross_attention": null,
|
42 |
+
"mid_block_scale_factor": 1,
|
43 |
+
"mid_block_type": "UNetMidBlock2DCrossAttn",
|
44 |
+
"min_decay": 0.0,
|
45 |
+
"norm_eps": 1e-05,
|
46 |
+
"norm_num_groups": 32,
|
47 |
+
"num_attention_heads": null,
|
48 |
+
"num_class_embeds": null,
|
49 |
+
"only_cross_attention": false,
|
50 |
+
"optimization_step": 1000,
|
51 |
+
"out_channels": 4,
|
52 |
+
"power": 0.6666666666666666,
|
53 |
+
"projection_class_embeddings_input_dim": null,
|
54 |
+
"resnet_out_scale_factor": 1.0,
|
55 |
+
"resnet_skip_time_act": false,
|
56 |
+
"resnet_time_scale_shift": "default",
|
57 |
+
"reverse_transformer_layers_per_block": null,
|
58 |
+
"sample_size": 64,
|
59 |
+
"time_cond_proj_dim": null,
|
60 |
+
"time_embedding_act_fn": null,
|
61 |
+
"time_embedding_dim": null,
|
62 |
+
"time_embedding_type": "positional",
|
63 |
+
"timestep_post_act": null,
|
64 |
+
"transformer_layers_per_block": 1,
|
65 |
+
"up_block_types": [
|
66 |
+
"UpBlock2D",
|
67 |
+
"CrossAttnUpBlock2D",
|
68 |
+
"CrossAttnUpBlock2D",
|
69 |
+
"CrossAttnUpBlock2D"
|
70 |
+
],
|
71 |
+
"upcast_attention": false,
|
72 |
+
"update_after_step": 0,
|
73 |
+
"use_ema_warmup": false,
|
74 |
+
"use_linear_projection": false
|
75 |
+
}
|
checkpoint-1000/unet_ema/diffusion_pytorch_model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a2a9465242c34d3efc3e598e6191850a632061989bef857bcfd316ee3ebc06a9
|
3 |
+
size 3438167536
|
checkpoint-1500/optimizer.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:de4cb2e1545da9ed27e8d1cc72a8996d548efc2c27356cecbce5fb096f8003ac
|
3 |
+
size 6876750164
|
checkpoint-1500/random_states_0.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6ad267be53cffce66ea1aeb2161fd19c5de542af7ee118ada95b382111dc8230
|
3 |
+
size 14344
|
checkpoint-1500/scaler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c2a4b6e0ef05ca249f2d6b7f9f3ad1dff81e55842a962df795bb9740c17c8e92
|
3 |
+
size 988
|
checkpoint-1500/scheduler.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ba898b1f3e5e45d9f3bb45a6842d5311c6a3eb3ad1dbec65e3d786213a1ad8ec
|
3 |
+
size 1000
|
checkpoint-1500/unet/config.json
ADDED
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_class_name": "UNet2DConditionModel",
|
3 |
+
"_diffusers_version": "0.28.0.dev0",
|
4 |
+
"_name_or_path": "CompVis/stable-diffusion-v1-4",
|
5 |
+
"act_fn": "silu",
|
6 |
+
"addition_embed_type": null,
|
7 |
+
"addition_embed_type_num_heads": 64,
|
8 |
+
"addition_time_embed_dim": null,
|
9 |
+
"attention_head_dim": 8,
|
10 |
+
"attention_type": "default",
|
11 |
+
"block_out_channels": [
|
12 |
+
320,
|
13 |
+
640,
|
14 |
+
1280,
|
15 |
+
1280
|
16 |
+
],
|
17 |
+
"center_input_sample": false,
|
18 |
+
"class_embed_type": null,
|
19 |
+
"class_embeddings_concat": false,
|
20 |
+
"conv_in_kernel": 3,
|
21 |
+
"conv_out_kernel": 3,
|
22 |
+
"cross_attention_dim": 768,
|
23 |
+
"cross_attention_norm": null,
|
24 |
+
"down_block_types": [
|
25 |
+
"CrossAttnDownBlock2D",
|
26 |
+
"CrossAttnDownBlock2D",
|
27 |
+
"CrossAttnDownBlock2D",
|
28 |
+
"DownBlock2D"
|
29 |
+
],
|
30 |
+
"downsample_padding": 1,
|
31 |
+
"dropout": 0.0,
|
32 |
+
"dual_cross_attention": false,
|
33 |
+
"encoder_hid_dim": null,
|
34 |
+
"encoder_hid_dim_type": null,
|
35 |
+
"flip_sin_to_cos": true,
|
36 |
+
"freq_shift": 0,
|
37 |
+
"in_channels": 4,
|
38 |
+
"layers_per_block": 2,
|
39 |
+
"mid_block_only_cross_attention": null,
|
40 |
+
"mid_block_scale_factor": 1,
|
41 |
+
"mid_block_type": "UNetMidBlock2DCrossAttn",
|
42 |
+
"norm_eps": 1e-05,
|
43 |
+
"norm_num_groups": 32,
|
44 |
+
"num_attention_heads": null,
|
45 |
+
"num_class_embeds": null,
|
46 |
+
"only_cross_attention": false,
|
47 |
+
"out_channels": 4,
|
48 |
+
"projection_class_embeddings_input_dim": null,
|
49 |
+
"resnet_out_scale_factor": 1.0,
|
50 |
+
"resnet_skip_time_act": false,
|
51 |
+
"resnet_time_scale_shift": "default",
|
52 |
+
"reverse_transformer_layers_per_block": null,
|
53 |
+
"sample_size": 64,
|
54 |
+
"time_cond_proj_dim": null,
|
55 |
+
"time_embedding_act_fn": null,
|
56 |
+
"time_embedding_dim": null,
|
57 |
+
"time_embedding_type": "positional",
|
58 |
+
"timestep_post_act": null,
|
59 |
+
"transformer_layers_per_block": 1,
|
60 |
+
"up_block_types": [
|
61 |
+
"UpBlock2D",
|
62 |
+
"CrossAttnUpBlock2D",
|
63 |
+
"CrossAttnUpBlock2D",
|
64 |
+
"CrossAttnUpBlock2D"
|
65 |
+
],
|
66 |
+
"upcast_attention": false,
|
67 |
+
"use_linear_projection": false
|
68 |
+
}
|
checkpoint-1500/unet/diffusion_pytorch_model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ced3c75b62df2d560c6e8633f6433b693e44b6098d590a61341672afb5a3999e
|
3 |
+
size 3438167536
|
checkpoint-1500/unet_ema/config.json
ADDED
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_class_name": "UNet2DConditionModel",
|
3 |
+
"_diffusers_version": "0.28.0.dev0",
|
4 |
+
"_name_or_path": "CompVis/stable-diffusion-v1-4",
|
5 |
+
"act_fn": "silu",
|
6 |
+
"addition_embed_type": null,
|
7 |
+
"addition_embed_type_num_heads": 64,
|
8 |
+
"addition_time_embed_dim": null,
|
9 |
+
"attention_head_dim": 8,
|
10 |
+
"attention_type": "default",
|
11 |
+
"block_out_channels": [
|
12 |
+
320,
|
13 |
+
640,
|
14 |
+
1280,
|
15 |
+
1280
|
16 |
+
],
|
17 |
+
"center_input_sample": false,
|
18 |
+
"class_embed_type": null,
|
19 |
+
"class_embeddings_concat": false,
|
20 |
+
"conv_in_kernel": 3,
|
21 |
+
"conv_out_kernel": 3,
|
22 |
+
"cross_attention_dim": 768,
|
23 |
+
"cross_attention_norm": null,
|
24 |
+
"decay": 0.9999,
|
25 |
+
"down_block_types": [
|
26 |
+
"CrossAttnDownBlock2D",
|
27 |
+
"CrossAttnDownBlock2D",
|
28 |
+
"CrossAttnDownBlock2D",
|
29 |
+
"DownBlock2D"
|
30 |
+
],
|
31 |
+
"downsample_padding": 1,
|
32 |
+
"dropout": 0.0,
|
33 |
+
"dual_cross_attention": false,
|
34 |
+
"encoder_hid_dim": null,
|
35 |
+
"encoder_hid_dim_type": null,
|
36 |
+
"flip_sin_to_cos": true,
|
37 |
+
"freq_shift": 0,
|
38 |
+
"in_channels": 4,
|
39 |
+
"inv_gamma": 1.0,
|
40 |
+
"layers_per_block": 2,
|
41 |
+
"mid_block_only_cross_attention": null,
|
42 |
+
"mid_block_scale_factor": 1,
|
43 |
+
"mid_block_type": "UNetMidBlock2DCrossAttn",
|
44 |
+
"min_decay": 0.0,
|
45 |
+
"norm_eps": 1e-05,
|
46 |
+
"norm_num_groups": 32,
|
47 |
+
"num_attention_heads": null,
|
48 |
+
"num_class_embeds": null,
|
49 |
+
"only_cross_attention": false,
|
50 |
+
"optimization_step": 1500,
|
51 |
+
"out_channels": 4,
|
52 |
+
"power": 0.6666666666666666,
|
53 |
+
"projection_class_embeddings_input_dim": null,
|
54 |
+
"resnet_out_scale_factor": 1.0,
|
55 |
+
"resnet_skip_time_act": false,
|
56 |
+
"resnet_time_scale_shift": "default",
|
57 |
+
"reverse_transformer_layers_per_block": null,
|
58 |
+
"sample_size": 64,
|
59 |
+
"time_cond_proj_dim": null,
|
60 |
+
"time_embedding_act_fn": null,
|
61 |
+
"time_embedding_dim": null,
|
62 |
+
"time_embedding_type": "positional",
|
63 |
+
"timestep_post_act": null,
|
64 |
+
"transformer_layers_per_block": 1,
|
65 |
+
"up_block_types": [
|
66 |
+
"UpBlock2D",
|
67 |
+
"CrossAttnUpBlock2D",
|
68 |
+
"CrossAttnUpBlock2D",
|
69 |
+
"CrossAttnUpBlock2D"
|
70 |
+
],
|
71 |
+
"upcast_attention": false,
|
72 |
+
"update_after_step": 0,
|
73 |
+
"use_ema_warmup": false,
|
74 |
+
"use_linear_projection": false
|
75 |
+
}
|
checkpoint-1500/unet_ema/diffusion_pytorch_model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d1a4705123dcbc9f34687eb9003926377bcb54b67808f975813a2fe53e2cb900
|
3 |
+
size 3438167536
|
checkpoint-2000/optimizer.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:065d82fbbc4414f705767729df2489e9ba45f81b3dba732d15a977b20400877d
|
3 |
+
size 6876750164
|
checkpoint-2000/random_states_0.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:19767e25d9233a793daf2b094925a6e152bfe01b652b8ccc0be85f9160f75a5f
|
3 |
+
size 14344
|
checkpoint-2000/scaler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3c50a9cebe5d66d453d25b140738bff479749ac03e0a43597d8776bc22f6ed0c
|
3 |
+
size 988
|
checkpoint-2000/scheduler.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f4ec1eff211b49fcc3bc0826c9b848f15ac65692932719b83e4f2c5ed5f4d098
|
3 |
+
size 1000
|
checkpoint-2000/unet/config.json
ADDED
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_class_name": "UNet2DConditionModel",
|
3 |
+
"_diffusers_version": "0.28.0.dev0",
|
4 |
+
"_name_or_path": "CompVis/stable-diffusion-v1-4",
|
5 |
+
"act_fn": "silu",
|
6 |
+
"addition_embed_type": null,
|
7 |
+
"addition_embed_type_num_heads": 64,
|
8 |
+
"addition_time_embed_dim": null,
|
9 |
+
"attention_head_dim": 8,
|
10 |
+
"attention_type": "default",
|
11 |
+
"block_out_channels": [
|
12 |
+
320,
|
13 |
+
640,
|
14 |
+
1280,
|
15 |
+
1280
|
16 |
+
],
|
17 |
+
"center_input_sample": false,
|
18 |
+
"class_embed_type": null,
|
19 |
+
"class_embeddings_concat": false,
|
20 |
+
"conv_in_kernel": 3,
|
21 |
+
"conv_out_kernel": 3,
|
22 |
+
"cross_attention_dim": 768,
|
23 |
+
"cross_attention_norm": null,
|
24 |
+
"down_block_types": [
|
25 |
+
"CrossAttnDownBlock2D",
|
26 |
+
"CrossAttnDownBlock2D",
|
27 |
+
"CrossAttnDownBlock2D",
|
28 |
+
"DownBlock2D"
|
29 |
+
],
|
30 |
+
"downsample_padding": 1,
|
31 |
+
"dropout": 0.0,
|
32 |
+
"dual_cross_attention": false,
|
33 |
+
"encoder_hid_dim": null,
|
34 |
+
"encoder_hid_dim_type": null,
|
35 |
+
"flip_sin_to_cos": true,
|
36 |
+
"freq_shift": 0,
|
37 |
+
"in_channels": 4,
|
38 |
+
"layers_per_block": 2,
|
39 |
+
"mid_block_only_cross_attention": null,
|
40 |
+
"mid_block_scale_factor": 1,
|
41 |
+
"mid_block_type": "UNetMidBlock2DCrossAttn",
|
42 |
+
"norm_eps": 1e-05,
|
43 |
+
"norm_num_groups": 32,
|
44 |
+
"num_attention_heads": null,
|
45 |
+
"num_class_embeds": null,
|
46 |
+
"only_cross_attention": false,
|
47 |
+
"out_channels": 4,
|
48 |
+
"projection_class_embeddings_input_dim": null,
|
49 |
+
"resnet_out_scale_factor": 1.0,
|
50 |
+
"resnet_skip_time_act": false,
|
51 |
+
"resnet_time_scale_shift": "default",
|
52 |
+
"reverse_transformer_layers_per_block": null,
|
53 |
+
"sample_size": 64,
|
54 |
+
"time_cond_proj_dim": null,
|
55 |
+
"time_embedding_act_fn": null,
|
56 |
+
"time_embedding_dim": null,
|
57 |
+
"time_embedding_type": "positional",
|
58 |
+
"timestep_post_act": null,
|
59 |
+
"transformer_layers_per_block": 1,
|
60 |
+
"up_block_types": [
|
61 |
+
"UpBlock2D",
|
62 |
+
"CrossAttnUpBlock2D",
|
63 |
+
"CrossAttnUpBlock2D",
|
64 |
+
"CrossAttnUpBlock2D"
|
65 |
+
],
|
66 |
+
"upcast_attention": false,
|
67 |
+
"use_linear_projection": false
|
68 |
+
}
|
checkpoint-2000/unet/diffusion_pytorch_model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:434f9e3352c835583ab63269ce6259fef3293666cc80225d1a153611856db1ec
|
3 |
+
size 3438167536
|
checkpoint-2000/unet_ema/config.json
ADDED
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_class_name": "UNet2DConditionModel",
|
3 |
+
"_diffusers_version": "0.28.0.dev0",
|
4 |
+
"_name_or_path": "CompVis/stable-diffusion-v1-4",
|
5 |
+
"act_fn": "silu",
|
6 |
+
"addition_embed_type": null,
|
7 |
+
"addition_embed_type_num_heads": 64,
|
8 |
+
"addition_time_embed_dim": null,
|
9 |
+
"attention_head_dim": 8,
|
10 |
+
"attention_type": "default",
|
11 |
+
"block_out_channels": [
|
12 |
+
320,
|
13 |
+
640,
|
14 |
+
1280,
|
15 |
+
1280
|
16 |
+
],
|
17 |
+
"center_input_sample": false,
|
18 |
+
"class_embed_type": null,
|
19 |
+
"class_embeddings_concat": false,
|
20 |
+
"conv_in_kernel": 3,
|
21 |
+
"conv_out_kernel": 3,
|
22 |
+
"cross_attention_dim": 768,
|
23 |
+
"cross_attention_norm": null,
|
24 |
+
"decay": 0.9999,
|
25 |
+
"down_block_types": [
|
26 |
+
"CrossAttnDownBlock2D",
|
27 |
+
"CrossAttnDownBlock2D",
|
28 |
+
"CrossAttnDownBlock2D",
|
29 |
+
"DownBlock2D"
|
30 |
+
],
|
31 |
+
"downsample_padding": 1,
|
32 |
+
"dropout": 0.0,
|
33 |
+
"dual_cross_attention": false,
|
34 |
+
"encoder_hid_dim": null,
|
35 |
+
"encoder_hid_dim_type": null,
|
36 |
+
"flip_sin_to_cos": true,
|
37 |
+
"freq_shift": 0,
|
38 |
+
"in_channels": 4,
|
39 |
+
"inv_gamma": 1.0,
|
40 |
+
"layers_per_block": 2,
|
41 |
+
"mid_block_only_cross_attention": null,
|
42 |
+
"mid_block_scale_factor": 1,
|
43 |
+
"mid_block_type": "UNetMidBlock2DCrossAttn",
|
44 |
+
"min_decay": 0.0,
|
45 |
+
"norm_eps": 1e-05,
|
46 |
+
"norm_num_groups": 32,
|
47 |
+
"num_attention_heads": null,
|
48 |
+
"num_class_embeds": null,
|
49 |
+
"only_cross_attention": false,
|
50 |
+
"optimization_step": 2000,
|
51 |
+
"out_channels": 4,
|
52 |
+
"power": 0.6666666666666666,
|
53 |
+
"projection_class_embeddings_input_dim": null,
|
54 |
+
"resnet_out_scale_factor": 1.0,
|
55 |
+
"resnet_skip_time_act": false,
|
56 |
+
"resnet_time_scale_shift": "default",
|
57 |
+
"reverse_transformer_layers_per_block": null,
|
58 |
+
"sample_size": 64,
|
59 |
+
"time_cond_proj_dim": null,
|
60 |
+
"time_embedding_act_fn": null,
|
61 |
+
"time_embedding_dim": null,
|
62 |
+
"time_embedding_type": "positional",
|
63 |
+
"timestep_post_act": null,
|
64 |
+
"transformer_layers_per_block": 1,
|
65 |
+
"up_block_types": [
|
66 |
+
"UpBlock2D",
|
67 |
+
"CrossAttnUpBlock2D",
|
68 |
+
"CrossAttnUpBlock2D",
|
69 |
+
"CrossAttnUpBlock2D"
|
70 |
+
],
|
71 |
+
"upcast_attention": false,
|
72 |
+
"update_after_step": 0,
|
73 |
+
"use_ema_warmup": false,
|
74 |
+
"use_linear_projection": false
|
75 |
+
}
|
checkpoint-2000/unet_ema/diffusion_pytorch_model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3f0ef10763e9305712285d62c8929c94b30004dc929c6bee7c1b4186e1d0337d
|
3 |
+
size 3438167536
|
checkpoint-2500/optimizer.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ee668a0a89c56c32ecec9250994e69869eed34b9363fa1a77781b7e8fd2aca86
|
3 |
+
size 6876750164
|
checkpoint-2500/random_states_0.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:65bfb2775f9f700d7139f05c7aafb6cef63c27c8e2c05b9d1af3a7e91ab37558
|
3 |
+
size 14344
|
checkpoint-2500/scaler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:48e2d97f563bb838328076a1666504681962151a3975a2f064be3a03e6500740
|
3 |
+
size 988
|
checkpoint-2500/scheduler.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9fd48d95ba81ff009e941f14c1085b2911752a64759f8b8e1b500163b678d3a3
|
3 |
+
size 1000
|
checkpoint-2500/unet/config.json
ADDED
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_class_name": "UNet2DConditionModel",
|
3 |
+
"_diffusers_version": "0.28.0.dev0",
|
4 |
+
"_name_or_path": "CompVis/stable-diffusion-v1-4",
|
5 |
+
"act_fn": "silu",
|
6 |
+
"addition_embed_type": null,
|
7 |
+
"addition_embed_type_num_heads": 64,
|
8 |
+
"addition_time_embed_dim": null,
|
9 |
+
"attention_head_dim": 8,
|
10 |
+
"attention_type": "default",
|
11 |
+
"block_out_channels": [
|
12 |
+
320,
|
13 |
+
640,
|
14 |
+
1280,
|
15 |
+
1280
|
16 |
+
],
|
17 |
+
"center_input_sample": false,
|
18 |
+
"class_embed_type": null,
|
19 |
+
"class_embeddings_concat": false,
|
20 |
+
"conv_in_kernel": 3,
|
21 |
+
"conv_out_kernel": 3,
|
22 |
+
"cross_attention_dim": 768,
|
23 |
+
"cross_attention_norm": null,
|
24 |
+
"down_block_types": [
|
25 |
+
"CrossAttnDownBlock2D",
|
26 |
+
"CrossAttnDownBlock2D",
|
27 |
+
"CrossAttnDownBlock2D",
|
28 |
+
"DownBlock2D"
|
29 |
+
],
|
30 |
+
"downsample_padding": 1,
|
31 |
+
"dropout": 0.0,
|
32 |
+
"dual_cross_attention": false,
|
33 |
+
"encoder_hid_dim": null,
|
34 |
+
"encoder_hid_dim_type": null,
|
35 |
+
"flip_sin_to_cos": true,
|
36 |
+
"freq_shift": 0,
|
37 |
+
"in_channels": 4,
|
38 |
+
"layers_per_block": 2,
|
39 |
+
"mid_block_only_cross_attention": null,
|
40 |
+
"mid_block_scale_factor": 1,
|
41 |
+
"mid_block_type": "UNetMidBlock2DCrossAttn",
|
42 |
+
"norm_eps": 1e-05,
|
43 |
+
"norm_num_groups": 32,
|
44 |
+
"num_attention_heads": null,
|
45 |
+
"num_class_embeds": null,
|
46 |
+
"only_cross_attention": false,
|
47 |
+
"out_channels": 4,
|
48 |
+
"projection_class_embeddings_input_dim": null,
|
49 |
+
"resnet_out_scale_factor": 1.0,
|
50 |
+
"resnet_skip_time_act": false,
|
51 |
+
"resnet_time_scale_shift": "default",
|
52 |
+
"reverse_transformer_layers_per_block": null,
|
53 |
+
"sample_size": 64,
|
54 |
+
"time_cond_proj_dim": null,
|
55 |
+
"time_embedding_act_fn": null,
|
56 |
+
"time_embedding_dim": null,
|
57 |
+
"time_embedding_type": "positional",
|
58 |
+
"timestep_post_act": null,
|
59 |
+
"transformer_layers_per_block": 1,
|
60 |
+
"up_block_types": [
|
61 |
+
"UpBlock2D",
|
62 |
+
"CrossAttnUpBlock2D",
|
63 |
+
"CrossAttnUpBlock2D",
|
64 |
+
"CrossAttnUpBlock2D"
|
65 |
+
],
|
66 |
+
"upcast_attention": false,
|
67 |
+
"use_linear_projection": false
|
68 |
+
}
|
checkpoint-2500/unet/diffusion_pytorch_model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:94197d9fd800645e5e367e1326160fc15f77427d7a1901a47960de0f71e2bb4c
|
3 |
+
size 3438167536
|
checkpoint-2500/unet_ema/config.json
ADDED
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_class_name": "UNet2DConditionModel",
|
3 |
+
"_diffusers_version": "0.28.0.dev0",
|
4 |
+
"_name_or_path": "CompVis/stable-diffusion-v1-4",
|
5 |
+
"act_fn": "silu",
|
6 |
+
"addition_embed_type": null,
|
7 |
+
"addition_embed_type_num_heads": 64,
|
8 |
+
"addition_time_embed_dim": null,
|
9 |
+
"attention_head_dim": 8,
|
10 |
+
"attention_type": "default",
|
11 |
+
"block_out_channels": [
|
12 |
+
320,
|
13 |
+
640,
|
14 |
+
1280,
|
15 |
+
1280
|
16 |
+
],
|
17 |
+
"center_input_sample": false,
|
18 |
+
"class_embed_type": null,
|
19 |
+
"class_embeddings_concat": false,
|
20 |
+
"conv_in_kernel": 3,
|
21 |
+
"conv_out_kernel": 3,
|
22 |
+
"cross_attention_dim": 768,
|
23 |
+
"cross_attention_norm": null,
|
24 |
+
"decay": 0.9999,
|
25 |
+
"down_block_types": [
|
26 |
+
"CrossAttnDownBlock2D",
|
27 |
+
"CrossAttnDownBlock2D",
|
28 |
+
"CrossAttnDownBlock2D",
|
29 |
+
"DownBlock2D"
|
30 |
+
],
|
31 |
+
"downsample_padding": 1,
|
32 |
+
"dropout": 0.0,
|
33 |
+
"dual_cross_attention": false,
|
34 |
+
"encoder_hid_dim": null,
|
35 |
+
"encoder_hid_dim_type": null,
|
36 |
+
"flip_sin_to_cos": true,
|
37 |
+
"freq_shift": 0,
|
38 |
+
"in_channels": 4,
|
39 |
+
"inv_gamma": 1.0,
|
40 |
+
"layers_per_block": 2,
|
41 |
+
"mid_block_only_cross_attention": null,
|
42 |
+
"mid_block_scale_factor": 1,
|
43 |
+
"mid_block_type": "UNetMidBlock2DCrossAttn",
|
44 |
+
"min_decay": 0.0,
|
45 |
+
"norm_eps": 1e-05,
|
46 |
+
"norm_num_groups": 32,
|
47 |
+
"num_attention_heads": null,
|
48 |
+
"num_class_embeds": null,
|
49 |
+
"only_cross_attention": false,
|
50 |
+
"optimization_step": 2500,
|
51 |
+
"out_channels": 4,
|
52 |
+
"power": 0.6666666666666666,
|
53 |
+
"projection_class_embeddings_input_dim": null,
|
54 |
+
"resnet_out_scale_factor": 1.0,
|
55 |
+
"resnet_skip_time_act": false,
|
56 |
+
"resnet_time_scale_shift": "default",
|
57 |
+
"reverse_transformer_layers_per_block": null,
|
58 |
+
"sample_size": 64,
|
59 |
+
"time_cond_proj_dim": null,
|
60 |
+
"time_embedding_act_fn": null,
|
61 |
+
"time_embedding_dim": null,
|
62 |
+
"time_embedding_type": "positional",
|
63 |
+
"timestep_post_act": null,
|
64 |
+
"transformer_layers_per_block": 1,
|
65 |
+
"up_block_types": [
|
66 |
+
"UpBlock2D",
|
67 |
+
"CrossAttnUpBlock2D",
|
68 |
+
"CrossAttnUpBlock2D",
|
69 |
+
"CrossAttnUpBlock2D"
|
70 |
+
],
|
71 |
+
"upcast_attention": false,
|
72 |
+
"update_after_step": 0,
|
73 |
+
"use_ema_warmup": false,
|
74 |
+
"use_linear_projection": false
|
75 |
+
}
|
checkpoint-2500/unet_ema/diffusion_pytorch_model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f2fa6d0f3a0dd2f40b1ea9010a1fd9b8ade345a8fbff2a6c02547dab169e3470
|
3 |
+
size 3438167536
|
checkpoint-500/optimizer.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:abe7862d6d1f522edeea9ffc7fc83583de9a04dadc97e69e95499de1f1fb502f
|
3 |
+
size 6876750164
|
checkpoint-500/random_states_0.pkl
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9f0f8f95cab4faed1d01cadd09b460d838ef7d228a4d77e740dc38573d230fb6
|
3 |
+
size 14344
|
checkpoint-500/scaler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:18b984273ea2d45b7ffb1d047bb359d93111e41fcad70d16a1b453fd38f72636
|
3 |
+
size 988
|
checkpoint-500/scheduler.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9aabe32b674b8d907f94f2c44b0c93926d9cfb37065ef79af75d1bed0ae0cb70
|
3 |
+
size 1000
|
checkpoint-500/unet/config.json
ADDED
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_class_name": "UNet2DConditionModel",
|
3 |
+
"_diffusers_version": "0.28.0.dev0",
|
4 |
+
"_name_or_path": "CompVis/stable-diffusion-v1-4",
|
5 |
+
"act_fn": "silu",
|
6 |
+
"addition_embed_type": null,
|
7 |
+
"addition_embed_type_num_heads": 64,
|
8 |
+
"addition_time_embed_dim": null,
|
9 |
+
"attention_head_dim": 8,
|
10 |
+
"attention_type": "default",
|
11 |
+
"block_out_channels": [
|
12 |
+
320,
|
13 |
+
640,
|
14 |
+
1280,
|
15 |
+
1280
|
16 |
+
],
|
17 |
+
"center_input_sample": false,
|
18 |
+
"class_embed_type": null,
|
19 |
+
"class_embeddings_concat": false,
|
20 |
+
"conv_in_kernel": 3,
|
21 |
+
"conv_out_kernel": 3,
|
22 |
+
"cross_attention_dim": 768,
|
23 |
+
"cross_attention_norm": null,
|
24 |
+
"down_block_types": [
|
25 |
+
"CrossAttnDownBlock2D",
|
26 |
+
"CrossAttnDownBlock2D",
|
27 |
+
"CrossAttnDownBlock2D",
|
28 |
+
"DownBlock2D"
|
29 |
+
],
|
30 |
+
"downsample_padding": 1,
|
31 |
+
"dropout": 0.0,
|
32 |
+
"dual_cross_attention": false,
|
33 |
+
"encoder_hid_dim": null,
|
34 |
+
"encoder_hid_dim_type": null,
|
35 |
+
"flip_sin_to_cos": true,
|
36 |
+
"freq_shift": 0,
|
37 |
+
"in_channels": 4,
|
38 |
+
"layers_per_block": 2,
|
39 |
+
"mid_block_only_cross_attention": null,
|
40 |
+
"mid_block_scale_factor": 1,
|
41 |
+
"mid_block_type": "UNetMidBlock2DCrossAttn",
|
42 |
+
"norm_eps": 1e-05,
|
43 |
+
"norm_num_groups": 32,
|
44 |
+
"num_attention_heads": null,
|
45 |
+
"num_class_embeds": null,
|
46 |
+
"only_cross_attention": false,
|
47 |
+
"out_channels": 4,
|
48 |
+
"projection_class_embeddings_input_dim": null,
|
49 |
+
"resnet_out_scale_factor": 1.0,
|
50 |
+
"resnet_skip_time_act": false,
|
51 |
+
"resnet_time_scale_shift": "default",
|
52 |
+
"reverse_transformer_layers_per_block": null,
|
53 |
+
"sample_size": 64,
|
54 |
+
"time_cond_proj_dim": null,
|
55 |
+
"time_embedding_act_fn": null,
|
56 |
+
"time_embedding_dim": null,
|
57 |
+
"time_embedding_type": "positional",
|
58 |
+
"timestep_post_act": null,
|
59 |
+
"transformer_layers_per_block": 1,
|
60 |
+
"up_block_types": [
|
61 |
+
"UpBlock2D",
|
62 |
+
"CrossAttnUpBlock2D",
|
63 |
+
"CrossAttnUpBlock2D",
|
64 |
+
"CrossAttnUpBlock2D"
|
65 |
+
],
|
66 |
+
"upcast_attention": false,
|
67 |
+
"use_linear_projection": false
|
68 |
+
}
|
checkpoint-500/unet/diffusion_pytorch_model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:95b600fbd68982a020e41b7259042afc9a89b4839fcc279f3cfb84df62cc7dda
|
3 |
+
size 3438167536
|
checkpoint-500/unet_ema/config.json
ADDED
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_class_name": "UNet2DConditionModel",
|
3 |
+
"_diffusers_version": "0.28.0.dev0",
|
4 |
+
"_name_or_path": "CompVis/stable-diffusion-v1-4",
|
5 |
+
"act_fn": "silu",
|
6 |
+
"addition_embed_type": null,
|
7 |
+
"addition_embed_type_num_heads": 64,
|
8 |
+
"addition_time_embed_dim": null,
|
9 |
+
"attention_head_dim": 8,
|
10 |
+
"attention_type": "default",
|
11 |
+
"block_out_channels": [
|
12 |
+
320,
|
13 |
+
640,
|
14 |
+
1280,
|
15 |
+
1280
|
16 |
+
],
|
17 |
+
"center_input_sample": false,
|
18 |
+
"class_embed_type": null,
|
19 |
+
"class_embeddings_concat": false,
|
20 |
+
"conv_in_kernel": 3,
|
21 |
+
"conv_out_kernel": 3,
|
22 |
+
"cross_attention_dim": 768,
|
23 |
+
"cross_attention_norm": null,
|
24 |
+
"decay": 0.9999,
|
25 |
+
"down_block_types": [
|
26 |
+
"CrossAttnDownBlock2D",
|
27 |
+
"CrossAttnDownBlock2D",
|
28 |
+
"CrossAttnDownBlock2D",
|
29 |
+
"DownBlock2D"
|
30 |
+
],
|
31 |
+
"downsample_padding": 1,
|
32 |
+
"dropout": 0.0,
|
33 |
+
"dual_cross_attention": false,
|
34 |
+
"encoder_hid_dim": null,
|
35 |
+
"encoder_hid_dim_type": null,
|
36 |
+
"flip_sin_to_cos": true,
|
37 |
+
"freq_shift": 0,
|
38 |
+
"in_channels": 4,
|
39 |
+
"inv_gamma": 1.0,
|
40 |
+
"layers_per_block": 2,
|
41 |
+
"mid_block_only_cross_attention": null,
|
42 |
+
"mid_block_scale_factor": 1,
|
43 |
+
"mid_block_type": "UNetMidBlock2DCrossAttn",
|
44 |
+
"min_decay": 0.0,
|
45 |
+
"norm_eps": 1e-05,
|
46 |
+
"norm_num_groups": 32,
|
47 |
+
"num_attention_heads": null,
|
48 |
+
"num_class_embeds": null,
|
49 |
+
"only_cross_attention": false,
|
50 |
+
"optimization_step": 500,
|
51 |
+
"out_channels": 4,
|
52 |
+
"power": 0.6666666666666666,
|
53 |
+
"projection_class_embeddings_input_dim": null,
|
54 |
+
"resnet_out_scale_factor": 1.0,
|
55 |
+
"resnet_skip_time_act": false,
|
56 |
+
"resnet_time_scale_shift": "default",
|
57 |
+
"reverse_transformer_layers_per_block": null,
|
58 |
+
"sample_size": 64,
|
59 |
+
"time_cond_proj_dim": null,
|
60 |
+
"time_embedding_act_fn": null,
|
61 |
+
"time_embedding_dim": null,
|
62 |
+
"time_embedding_type": "positional",
|
63 |
+
"timestep_post_act": null,
|
64 |
+
"transformer_layers_per_block": 1,
|
65 |
+
"up_block_types": [
|
66 |
+
"UpBlock2D",
|
67 |
+
"CrossAttnUpBlock2D",
|
68 |
+
"CrossAttnUpBlock2D",
|
69 |
+
"CrossAttnUpBlock2D"
|
70 |
+
],
|
71 |
+
"upcast_attention": false,
|
72 |
+
"update_after_step": 0,
|
73 |
+
"use_ema_warmup": false,
|
74 |
+
"use_linear_projection": false
|
75 |
+
}
|
checkpoint-500/unet_ema/diffusion_pytorch_model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a5f22979cd24c02f91b977a6ee9e3a9914cb4ee977c3f6f6ea51c06ff9ac226e
|
3 |
+
size 3438167536
|
feature_extractor/preprocessor_config.json
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"crop_size": {
|
3 |
+
"height": 224,
|
4 |
+
"width": 224
|
5 |
+
},
|
6 |
+
"do_center_crop": true,
|
7 |
+
"do_convert_rgb": true,
|
8 |
+
"do_normalize": true,
|
9 |
+
"do_rescale": true,
|
10 |
+
"do_resize": true,
|
11 |
+
"image_mean": [
|
12 |
+
0.48145466,
|
13 |
+
0.4578275,
|
14 |
+
0.40821073
|
15 |
+
],
|
16 |
+
"image_processor_type": "CLIPImageProcessor",
|
17 |
+
"image_std": [
|
18 |
+
0.26862954,
|
19 |
+
0.26130258,
|
20 |
+
0.27577711
|
21 |
+
],
|
22 |
+
"resample": 3,
|
23 |
+
"rescale_factor": 0.00392156862745098,
|
24 |
+
"size": {
|
25 |
+
"shortest_edge": 224
|
26 |
+
}
|
27 |
+
}
|
logs/text2image-fine-tune/1713384066.8972213/events.out.tfevents.1713384066.880f55e348cb.9865.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f89402eb13c324dd967e39e44b07d06fab78686272901f19de6529a04e019b48
|
3 |
+
size 2262
|
logs/text2image-fine-tune/1713384066.8989856/hparams.yml
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
adam_beta1: 0.9
|
2 |
+
adam_beta2: 0.999
|
3 |
+
adam_epsilon: 1.0e-08
|
4 |
+
adam_weight_decay: 0.01
|
5 |
+
allow_tf32: false
|
6 |
+
cache_dir: null
|
7 |
+
caption_column: text
|
8 |
+
center_crop: true
|
9 |
+
checkpointing_steps: 500
|
10 |
+
checkpoints_total_limit: null
|
11 |
+
dataloader_num_workers: 0
|
12 |
+
dataset_config_name: null
|
13 |
+
dataset_name: BlackSadadou/STBlackIMGgen
|
14 |
+
enable_xformers_memory_efficient_attention: false
|
15 |
+
gradient_accumulation_steps: 4
|
16 |
+
gradient_checkpointing: true
|
17 |
+
hub_model_id: null
|
18 |
+
hub_token: null
|
19 |
+
image_column: image
|
20 |
+
input_perturbation: 0
|
21 |
+
learning_rate: 1.0e-05
|
22 |
+
local_rank: -1
|
23 |
+
logging_dir: logs
|
24 |
+
lr_scheduler: linear
|
25 |
+
lr_warmup_steps: 100
|
26 |
+
max_grad_norm: 1.0
|
27 |
+
max_train_samples: null
|
28 |
+
max_train_steps: 2500
|
29 |
+
mixed_precision: fp16
|
30 |
+
noise_offset: 0
|
31 |
+
non_ema_revision: null
|
32 |
+
num_train_epochs: 417
|
33 |
+
output_dir: SD-Black-IMGgen-Model-v2
|
34 |
+
prediction_type: null
|
35 |
+
pretrained_model_name_or_path: CompVis/stable-diffusion-v1-4
|
36 |
+
push_to_hub: true
|
37 |
+
random_flip: true
|
38 |
+
report_to: tensorboard
|
39 |
+
resolution: 512
|
40 |
+
resume_from_checkpoint: null
|
41 |
+
revision: null
|
42 |
+
scale_lr: false
|
43 |
+
seed: null
|
44 |
+
snr_gamma: null
|
45 |
+
tracker_project_name: text2image-fine-tune
|
46 |
+
train_batch_size: 1
|
47 |
+
train_data_dir: null
|
48 |
+
use_8bit_adam: false
|
49 |
+
use_ema: true
|
50 |
+
validation_epochs: 5
|
51 |
+
variant: null
|
logs/text2image-fine-tune/events.out.tfevents.1713384066.880f55e348cb.9865.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eccc0268ecfe7191de7c08dd9ccd19439a970c8aef8e069262bf4ad4a6d0013d
|
3 |
+
size 30436974
|
model_index.json
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_class_name": "StableDiffusionPipeline",
|
3 |
+
"_diffusers_version": "0.28.0.dev0",
|
4 |
+
"_name_or_path": "CompVis/stable-diffusion-v1-4",
|
5 |
+
"feature_extractor": [
|
6 |
+
"transformers",
|
7 |
+
"CLIPImageProcessor"
|
8 |
+
],
|
9 |
+
"image_encoder": [
|
10 |
+
null,
|
11 |
+
null
|
12 |
+
],
|
13 |
+
"requires_safety_checker": true,
|
14 |
+
"safety_checker": [
|
15 |
+
"stable_diffusion",
|
16 |
+
"StableDiffusionSafetyChecker"
|
17 |
+
],
|
18 |
+
"scheduler": [
|
19 |
+
"diffusers",
|
20 |
+
"PNDMScheduler"
|
21 |
+
],
|
22 |
+
"text_encoder": [
|
23 |
+
"transformers",
|
24 |
+
"CLIPTextModel"
|
25 |
+
],
|
26 |
+
"tokenizer": [
|
27 |
+
"transformers",
|
28 |
+
"CLIPTokenizer"
|
29 |
+
],
|
30 |
+
"unet": [
|
31 |
+
"diffusers",
|
32 |
+
"UNet2DConditionModel"
|
33 |
+
],
|
34 |
+
"vae": [
|
35 |
+
"diffusers",
|
36 |
+
"AutoencoderKL"
|
37 |
+
]
|
38 |
+
}
|
safety_checker/config.json
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "/root/.cache/huggingface/hub/models--CompVis--stable-diffusion-v1-4/snapshots/133a221b8aa7292a167afc5127cb63fb5005638b/safety_checker",
|
3 |
+
"architectures": [
|
4 |
+
"StableDiffusionSafetyChecker"
|
5 |
+
],
|
6 |
+
"initializer_factor": 1.0,
|
7 |
+
"logit_scale_init_value": 2.6592,
|
8 |
+
"model_type": "clip",
|
9 |
+
"projection_dim": 768,
|
10 |
+
"text_config": {
|
11 |
+
"dropout": 0.0,
|
12 |
+
"hidden_size": 768,
|
13 |
+
"intermediate_size": 3072,
|
14 |
+
"model_type": "clip_text_model",
|
15 |
+
"num_attention_heads": 12
|
16 |
+
},
|
17 |
+
"torch_dtype": "float32",
|
18 |
+
"transformers_version": "4.38.2",
|
19 |
+
"vision_config": {
|
20 |
+
"dropout": 0.0,
|
21 |
+
"hidden_size": 1024,
|
22 |
+
"intermediate_size": 4096,
|
23 |
+
"model_type": "clip_vision_model",
|
24 |
+
"num_attention_heads": 16,
|
25 |
+
"num_hidden_layers": 24,
|
26 |
+
"patch_size": 14
|
27 |
+
}
|
28 |
+
}
|
safety_checker/model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fb351a5ded815c3ff744968ad9c6b218d071b9d313d04f35e813b84b4c0ffde8
|
3 |
+
size 1215979664
|
scheduler/scheduler_config.json
ADDED
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_class_name": "PNDMScheduler",
|
3 |
+
"_diffusers_version": "0.28.0.dev0",
|
4 |
+
"beta_end": 0.012,
|
5 |
+
"beta_schedule": "scaled_linear",
|
6 |
+
"beta_start": 0.00085,
|
7 |
+
"clip_sample": false,
|
8 |
+
"num_train_timesteps": 1000,
|
9 |
+
"prediction_type": "epsilon",
|
10 |
+
"set_alpha_to_one": false,
|
11 |
+
"skip_prk_steps": true,
|
12 |
+
"steps_offset": 1,
|
13 |
+
"timestep_spacing": "leading",
|
14 |
+
"trained_betas": null
|
15 |
+
}
|
text_encoder/config.json
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "CompVis/stable-diffusion-v1-4",
|
3 |
+
"architectures": [
|
4 |
+
"CLIPTextModel"
|
5 |
+
],
|
6 |
+
"attention_dropout": 0.0,
|
7 |
+
"bos_token_id": 0,
|
8 |
+
"dropout": 0.0,
|
9 |
+
"eos_token_id": 2,
|
10 |
+
"hidden_act": "quick_gelu",
|
11 |
+
"hidden_size": 768,
|
12 |
+
"initializer_factor": 1.0,
|
13 |
+
"initializer_range": 0.02,
|
14 |
+
"intermediate_size": 3072,
|
15 |
+
"layer_norm_eps": 1e-05,
|
16 |
+
"max_position_embeddings": 77,
|
17 |
+
"model_type": "clip_text_model",
|
18 |
+
"num_attention_heads": 12,
|
19 |
+
"num_hidden_layers": 12,
|
20 |
+
"pad_token_id": 1,
|
21 |
+
"projection_dim": 512,
|
22 |
+
"torch_dtype": "float16",
|
23 |
+
"transformers_version": "4.38.2",
|
24 |
+
"vocab_size": 49408
|
25 |
+
}
|