HighCWu commited on
Commit
ac22bf9
1 Parent(s): 06e068b

init commits

Browse files
README.md CHANGED
@@ -1,3 +1,38 @@
1
  ---
2
- license: openrail
 
 
 
 
 
 
 
 
 
 
3
  ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ license: creativeml-openrail-m
3
+ base_model: runwayml/stable-diffusion-v1-5
4
+ tags:
5
+ - stable-diffusion
6
+ - stable-diffusion-diffusers
7
+ - text-to-image
8
+ - diffusers
9
+ - lora
10
+ - controlnet
11
+ - control-lora
12
+ inference: true
13
  ---
14
+ # ControlLoRA text2image fine-tuning - Official Repository
15
+ These are ControlLoRA adaption weights for [runwayml/stable-diffusion-v1-5](https://huggingface.co/runwayml/stable-diffusion-v1-5).
16
+
17
+ With ControlLoRA, a simple and small (~7M parameters, ~25M storage space) network, you could control the spatial information of stable diffusion.
18
+
19
+ Each of the weights is fine-tuned on the [fill50k](https://huggingface.co/datasets/HighCWu/fill50k), [diffusiondb_2m_first_5k_canny](https://huggingface.co/datasets/HighCWu/diffusiondb_2m_first_5k_canny) and [mpii_100_openpose](https://huggingface.co/datasets/HighCWu/mpii_100_openpose) datasets with 30k steps on RTX4080 in 3 hours.
20
+
21
+ You can find some example images in the following.
22
+
23
+ ![img_{i}](./docs/imgs/validation_25100_fe0b49075b45efdc311c.png)
24
+ *boys are playing with a frisbee in a field,
25
+ 2009 cinematography, trending on artforum, running pose,
26
+ bruce springsteen, connected to heart machines, with tattoos,
27
+ beautiful - n 9, by Eric Dinyer, young child, midlands*
28
+
29
+ ![img_{i}](./docs/imgs/validation_25000_6b3f8b37f2268ec0c09e.png)
30
+ *portrait of a dancing eagle woman, "
31
+ "beautiful blonde haired lakota sioux goddess, "
32
+ "intricate, highly detailed art by james jean, "
33
+ "ray tracing, digital painting, artstation, "
34
+ "concept art, smooth, sharp focus, illustration, "
35
+ "artgerm and greg rutkowski and alphonse mucha, "
36
+ "vladimir kush, giger, roger dean, 8 k*
37
+
38
+ I also uploaded a lora model fine-tuned on my selfies with 2k steps which could be use in the experiment of mixing LoRA and ControlLoRA.
docs/imgs/validation_25000_6b3f8b37f2268ec0c09e.png ADDED
docs/imgs/validation_25100_fe0b49075b45efdc311c.png ADDED
sd-diffusiondb-canny-model-control-lora/config.json ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "ControlLoRA",
3
+ "_diffusers_version": "0.13.0.dev0",
4
+ "act_fn": "silu",
5
+ "block_out_channels": [
6
+ 32,
7
+ 64,
8
+ 128,
9
+ 256
10
+ ],
11
+ "down_block_types": [
12
+ "SimpleDownEncoderBlock2D",
13
+ "SimpleDownEncoderBlock2D",
14
+ "SimpleDownEncoderBlock2D",
15
+ "SimpleDownEncoderBlock2D"
16
+ ],
17
+ "in_channels": 3,
18
+ "layers_per_block": 1,
19
+ "lora_block_in_channels": [
20
+ 256,
21
+ 256,
22
+ 256,
23
+ 256
24
+ ],
25
+ "lora_block_out_channels": [
26
+ 320,
27
+ 640,
28
+ 1280,
29
+ 1280
30
+ ],
31
+ "lora_control_rank": null,
32
+ "lora_cross_attention_dims": [
33
+ [
34
+ null,
35
+ 768,
36
+ null,
37
+ 768,
38
+ null,
39
+ 768,
40
+ null,
41
+ 768,
42
+ null,
43
+ 768
44
+ ],
45
+ [
46
+ null,
47
+ 768,
48
+ null,
49
+ 768,
50
+ null,
51
+ 768,
52
+ null,
53
+ 768,
54
+ null,
55
+ 768
56
+ ],
57
+ [
58
+ null,
59
+ 768,
60
+ null,
61
+ 768,
62
+ null,
63
+ 768,
64
+ null,
65
+ 768,
66
+ null,
67
+ 768
68
+ ],
69
+ [
70
+ null,
71
+ 768
72
+ ]
73
+ ],
74
+ "lora_post_add": false,
75
+ "lora_pre_conv_layers_kernel_size": 1,
76
+ "lora_pre_conv_layers_per_block": 1,
77
+ "lora_pre_conv_types": [
78
+ "SimpleDownEncoderBlock2D",
79
+ "SimpleDownEncoderBlock2D",
80
+ "SimpleDownEncoderBlock2D",
81
+ "SimpleDownEncoderBlock2D"
82
+ ],
83
+ "lora_pre_down_block_types": [
84
+ null,
85
+ "SimpleDownEncoderBlock2D",
86
+ "SimpleDownEncoderBlock2D",
87
+ "SimpleDownEncoderBlock2D"
88
+ ],
89
+ "lora_pre_down_layers_per_block": 1,
90
+ "lora_rank": 4,
91
+ "norm_num_groups": 32
92
+ }
sd-diffusiondb-canny-model-control-lora/diffusion_pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1c192f779d5f85db2fa00fb2a6277c2605ad3c9066e098b0ff7e139c8d713dd
3
+ size 24351137
sd-diffusiondb-canny-model-control-lora/diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d908c223e25342362765bcc0f1ae94a20e14093959a4d5d6682b881beaa735b
3
+ size 24230799
sd-highcwu_v1-model-lora/diffusion_pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:216d3c72023c3baaf47f12a1d916eeac3405829d04c31c9a5b594ba003cfcc7d
3
+ size 3283841
sd-highcwu_v1-model-lora/diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13669e40d4ce1bbc2a0a5c0b1599f7cd1a28d6e89ff7f432e89d9cbc0294e7cb
3
+ size 3227303
sd-mpii-pose-model-control-lora/config.json ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_class_name": "ControlLoRA",
3
+ "_diffusers_version": "0.13.0.dev0",
4
+ "act_fn": "silu",
5
+ "block_out_channels": [
6
+ 32,
7
+ 64,
8
+ 128,
9
+ 256
10
+ ],
11
+ "down_block_types": [
12
+ "SimpleDownEncoderBlock2D",
13
+ "SimpleDownEncoderBlock2D",
14
+ "SimpleDownEncoderBlock2D",
15
+ "SimpleDownEncoderBlock2D"
16
+ ],
17
+ "in_channels": 3,
18
+ "layers_per_block": 1,
19
+ "lora_block_in_channels": [
20
+ 256,
21
+ 256,
22
+ 256,
23
+ 256
24
+ ],
25
+ "lora_block_out_channels": [
26
+ 320,
27
+ 640,
28
+ 1280,
29
+ 1280
30
+ ],
31
+ "lora_control_rank": null,
32
+ "lora_cross_attention_dims": [
33
+ [
34
+ null,
35
+ 768,
36
+ null,
37
+ 768,
38
+ null,
39
+ 768,
40
+ null,
41
+ 768,
42
+ null,
43
+ 768
44
+ ],
45
+ [
46
+ null,
47
+ 768,
48
+ null,
49
+ 768,
50
+ null,
51
+ 768,
52
+ null,
53
+ 768,
54
+ null,
55
+ 768
56
+ ],
57
+ [
58
+ null,
59
+ 768,
60
+ null,
61
+ 768,
62
+ null,
63
+ 768,
64
+ null,
65
+ 768,
66
+ null,
67
+ 768
68
+ ],
69
+ [
70
+ null,
71
+ 768
72
+ ]
73
+ ],
74
+ "lora_post_add": false,
75
+ "lora_pre_conv_layers_kernel_size": 1,
76
+ "lora_pre_conv_layers_per_block": 1,
77
+ "lora_pre_conv_types": [
78
+ "SimpleDownEncoderBlock2D",
79
+ "SimpleDownEncoderBlock2D",
80
+ "SimpleDownEncoderBlock2D",
81
+ "SimpleDownEncoderBlock2D"
82
+ ],
83
+ "lora_pre_down_block_types": [
84
+ null,
85
+ "SimpleDownEncoderBlock2D",
86
+ "SimpleDownEncoderBlock2D",
87
+ "SimpleDownEncoderBlock2D"
88
+ ],
89
+ "lora_pre_down_layers_per_block": 1,
90
+ "lora_rank": 4,
91
+ "norm_num_groups": 32
92
+ }
sd-mpii-pose-model-control-lora/diffusion_pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34dccce2d034031bd1a37fb763f2fddb0e10f164ed31c0487937f0b2c3e5c55b
3
+ size 24351137
sd-mpii-pose-model-control-lora/diffusion_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59f313b024a89b7bd6ad04fba528523368bbcb9957f5fc8d640c509379da6c51
3
+ size 24230799