licyk commited on
Commit
930dbad
1 Parent(s): 47159be
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. cldm_v15.yaml +0 -79
  2. cldm_v21.yaml +0 -85
  3. control_sd15_canny.yaml +0 -79
  4. control_sd15_depth.yaml +0 -79
  5. control_sd15_hed.yaml +0 -79
  6. control_sd15_mlsd.yaml +0 -79
  7. control_sd15_normal.yaml +0 -79
  8. control_sd15_openpose.yaml +0 -79
  9. control_sd15_scribble.yaml +0 -79
  10. control_sd15_seg.yaml +0 -79
  11. control_v11e_sd15_ip2p.yaml +0 -79
  12. control_v11e_sd15_ip2p_fp16.yaml +0 -79
  13. control_v11e_sd15_shuffle.yaml +0 -80
  14. control_v11e_sd15_shuffle_fp16.yaml +0 -80
  15. control_v11f1e_sd15_tile.yaml +0 -79
  16. control_v11f1e_sd15_tile_fp16.yaml +0 -79
  17. control_v11f1p_sd15_depth.yaml +0 -79
  18. control_v11f1p_sd15_depth_fp16.yaml +0 -79
  19. control_v11p_sd15_canny.yaml +0 -79
  20. control_v11p_sd15_canny_fp16.yaml +0 -79
  21. control_v11p_sd15_inpaint.yaml +0 -79
  22. control_v11p_sd15_inpaint_fp16.yaml +0 -79
  23. control_v11p_sd15_lineart.yaml +0 -79
  24. control_v11p_sd15_lineart_fp16.yaml +0 -79
  25. control_v11p_sd15_mlsd.yaml +0 -79
  26. control_v11p_sd15_mlsd_fp16.yaml +0 -79
  27. control_v11p_sd15_normalbae.yaml +0 -79
  28. control_v11p_sd15_normalbae_fp16.yaml +0 -79
  29. control_v11p_sd15_openpose.yaml +0 -79
  30. control_v11p_sd15_openpose_fp16.yaml +0 -79
  31. control_v11p_sd15_scribble.yaml +0 -79
  32. control_v11p_sd15_scribble_fp16.yaml +0 -79
  33. control_v11p_sd15_seg.yaml +0 -79
  34. control_v11p_sd15_seg_fp16.yaml +0 -79
  35. control_v11p_sd15_softedge.yaml +0 -79
  36. control_v11p_sd15_softedge_fp16.yaml +0 -79
  37. control_v11p_sd15s2_lineart_anime.yaml +0 -79
  38. control_v11p_sd15s2_lineart_anime_fp16.yaml +0 -79
  39. control_v1p_sd15_qrcode_monster.yaml +0 -80
  40. image_adapter_v14.yaml +0 -9
  41. sketch_adapter_v14.yaml +0 -9
  42. t2iadapter_canny_sd14v1.yaml +0 -9
  43. t2iadapter_canny_sd15v2.yaml +0 -9
  44. t2iadapter_color_sd14v1.yaml +0 -6
  45. t2iadapter_depth_sd14v1.yaml +0 -9
  46. t2iadapter_depth_sd15v2.yaml +0 -9
  47. t2iadapter_keypose_sd14v1.yaml +0 -9
  48. t2iadapter_openpose_sd14v1.yaml +0 -9
  49. t2iadapter_seg_sd14v1.yaml +0 -9
  50. t2iadapter_sketch_sd14v1.yaml +0 -9
cldm_v15.yaml DELETED
@@ -1,79 +0,0 @@
1
- model:
2
- target: cldm.cldm.ControlLDM
3
- params:
4
- linear_start: 0.00085
5
- linear_end: 0.0120
6
- num_timesteps_cond: 1
7
- log_every_t: 200
8
- timesteps: 1000
9
- first_stage_key: "jpg"
10
- cond_stage_key: "txt"
11
- control_key: "hint"
12
- image_size: 64
13
- channels: 4
14
- cond_stage_trainable: false
15
- conditioning_key: crossattn
16
- monitor: val/loss_simple_ema
17
- scale_factor: 0.18215
18
- use_ema: False
19
- only_mid_control: False
20
-
21
- control_stage_config:
22
- target: cldm.cldm.ControlNet
23
- params:
24
- image_size: 32 # unused
25
- in_channels: 4
26
- hint_channels: 3
27
- model_channels: 320
28
- attention_resolutions: [ 4, 2, 1 ]
29
- num_res_blocks: 2
30
- channel_mult: [ 1, 2, 4, 4 ]
31
- num_heads: 8
32
- use_spatial_transformer: True
33
- transformer_depth: 1
34
- context_dim: 768
35
- use_checkpoint: True
36
- legacy: False
37
-
38
- unet_config:
39
- target: cldm.cldm.ControlledUnetModel
40
- params:
41
- image_size: 32 # unused
42
- in_channels: 4
43
- out_channels: 4
44
- model_channels: 320
45
- attention_resolutions: [ 4, 2, 1 ]
46
- num_res_blocks: 2
47
- channel_mult: [ 1, 2, 4, 4 ]
48
- num_heads: 8
49
- use_spatial_transformer: True
50
- transformer_depth: 1
51
- context_dim: 768
52
- use_checkpoint: True
53
- legacy: False
54
-
55
- first_stage_config:
56
- target: ldm.models.autoencoder.AutoencoderKL
57
- params:
58
- embed_dim: 4
59
- monitor: val/rec_loss
60
- ddconfig:
61
- double_z: true
62
- z_channels: 4
63
- resolution: 256
64
- in_channels: 3
65
- out_ch: 3
66
- ch: 128
67
- ch_mult:
68
- - 1
69
- - 2
70
- - 4
71
- - 4
72
- num_res_blocks: 2
73
- attn_resolutions: []
74
- dropout: 0.0
75
- lossconfig:
76
- target: torch.nn.Identity
77
-
78
- cond_stage_config:
79
- target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cldm_v21.yaml DELETED
@@ -1,85 +0,0 @@
1
- model:
2
- target: cldm.cldm.ControlLDM
3
- params:
4
- linear_start: 0.00085
5
- linear_end: 0.0120
6
- num_timesteps_cond: 1
7
- log_every_t: 200
8
- timesteps: 1000
9
- first_stage_key: "jpg"
10
- cond_stage_key: "txt"
11
- control_key: "hint"
12
- image_size: 64
13
- channels: 4
14
- cond_stage_trainable: false
15
- conditioning_key: crossattn
16
- monitor: val/loss_simple_ema
17
- scale_factor: 0.18215
18
- use_ema: False
19
- only_mid_control: False
20
-
21
- control_stage_config:
22
- target: cldm.cldm.ControlNet
23
- params:
24
- use_checkpoint: True
25
- image_size: 32 # unused
26
- in_channels: 4
27
- hint_channels: 3
28
- model_channels: 320
29
- attention_resolutions: [ 4, 2, 1 ]
30
- num_res_blocks: 2
31
- channel_mult: [ 1, 2, 4, 4 ]
32
- num_head_channels: 64 # need to fix for flash-attn
33
- use_spatial_transformer: True
34
- use_linear_in_transformer: True
35
- transformer_depth: 1
36
- context_dim: 1024
37
- legacy: False
38
-
39
- unet_config:
40
- target: cldm.cldm.ControlledUnetModel
41
- params:
42
- use_checkpoint: True
43
- image_size: 32 # unused
44
- in_channels: 4
45
- out_channels: 4
46
- model_channels: 320
47
- attention_resolutions: [ 4, 2, 1 ]
48
- num_res_blocks: 2
49
- channel_mult: [ 1, 2, 4, 4 ]
50
- num_head_channels: 64 # need to fix for flash-attn
51
- use_spatial_transformer: True
52
- use_linear_in_transformer: True
53
- transformer_depth: 1
54
- context_dim: 1024
55
- legacy: False
56
-
57
- first_stage_config:
58
- target: ldm.models.autoencoder.AutoencoderKL
59
- params:
60
- embed_dim: 4
61
- monitor: val/rec_loss
62
- ddconfig:
63
- #attn_type: "vanilla-xformers"
64
- double_z: true
65
- z_channels: 4
66
- resolution: 256
67
- in_channels: 3
68
- out_ch: 3
69
- ch: 128
70
- ch_mult:
71
- - 1
72
- - 2
73
- - 4
74
- - 4
75
- num_res_blocks: 2
76
- attn_resolutions: []
77
- dropout: 0.0
78
- lossconfig:
79
- target: torch.nn.Identity
80
-
81
- cond_stage_config:
82
- target: ldm.modules.encoders.modules.FrozenOpenCLIPEmbedder
83
- params:
84
- freeze: True
85
- layer: "penultimate"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
control_sd15_canny.yaml DELETED
@@ -1,79 +0,0 @@
1
- model:
2
- target: cldm.cldm.ControlLDM
3
- params:
4
- linear_start: 0.00085
5
- linear_end: 0.0120
6
- num_timesteps_cond: 1
7
- log_every_t: 200
8
- timesteps: 1000
9
- first_stage_key: "jpg"
10
- cond_stage_key: "txt"
11
- control_key: "hint"
12
- image_size: 64
13
- channels: 4
14
- cond_stage_trainable: false
15
- conditioning_key: crossattn
16
- monitor: val/loss_simple_ema
17
- scale_factor: 0.18215
18
- use_ema: False
19
- only_mid_control: False
20
-
21
- control_stage_config:
22
- target: cldm.cldm.ControlNet
23
- params:
24
- image_size: 32 # unused
25
- in_channels: 4
26
- hint_channels: 3
27
- model_channels: 320
28
- attention_resolutions: [ 4, 2, 1 ]
29
- num_res_blocks: 2
30
- channel_mult: [ 1, 2, 4, 4 ]
31
- num_heads: 8
32
- use_spatial_transformer: True
33
- transformer_depth: 1
34
- context_dim: 768
35
- use_checkpoint: True
36
- legacy: False
37
-
38
- unet_config:
39
- target: cldm.cldm.ControlledUnetModel
40
- params:
41
- image_size: 32 # unused
42
- in_channels: 4
43
- out_channels: 4
44
- model_channels: 320
45
- attention_resolutions: [ 4, 2, 1 ]
46
- num_res_blocks: 2
47
- channel_mult: [ 1, 2, 4, 4 ]
48
- num_heads: 8
49
- use_spatial_transformer: True
50
- transformer_depth: 1
51
- context_dim: 768
52
- use_checkpoint: True
53
- legacy: False
54
-
55
- first_stage_config:
56
- target: ldm.models.autoencoder.AutoencoderKL
57
- params:
58
- embed_dim: 4
59
- monitor: val/rec_loss
60
- ddconfig:
61
- double_z: true
62
- z_channels: 4
63
- resolution: 256
64
- in_channels: 3
65
- out_ch: 3
66
- ch: 128
67
- ch_mult:
68
- - 1
69
- - 2
70
- - 4
71
- - 4
72
- num_res_blocks: 2
73
- attn_resolutions: []
74
- dropout: 0.0
75
- lossconfig:
76
- target: torch.nn.Identity
77
-
78
- cond_stage_config:
79
- target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
control_sd15_depth.yaml DELETED
@@ -1,79 +0,0 @@
1
- model:
2
- target: cldm.cldm.ControlLDM
3
- params:
4
- linear_start: 0.00085
5
- linear_end: 0.0120
6
- num_timesteps_cond: 1
7
- log_every_t: 200
8
- timesteps: 1000
9
- first_stage_key: "jpg"
10
- cond_stage_key: "txt"
11
- control_key: "hint"
12
- image_size: 64
13
- channels: 4
14
- cond_stage_trainable: false
15
- conditioning_key: crossattn
16
- monitor: val/loss_simple_ema
17
- scale_factor: 0.18215
18
- use_ema: False
19
- only_mid_control: False
20
-
21
- control_stage_config:
22
- target: cldm.cldm.ControlNet
23
- params:
24
- image_size: 32 # unused
25
- in_channels: 4
26
- hint_channels: 3
27
- model_channels: 320
28
- attention_resolutions: [ 4, 2, 1 ]
29
- num_res_blocks: 2
30
- channel_mult: [ 1, 2, 4, 4 ]
31
- num_heads: 8
32
- use_spatial_transformer: True
33
- transformer_depth: 1
34
- context_dim: 768
35
- use_checkpoint: True
36
- legacy: False
37
-
38
- unet_config:
39
- target: cldm.cldm.ControlledUnetModel
40
- params:
41
- image_size: 32 # unused
42
- in_channels: 4
43
- out_channels: 4
44
- model_channels: 320
45
- attention_resolutions: [ 4, 2, 1 ]
46
- num_res_blocks: 2
47
- channel_mult: [ 1, 2, 4, 4 ]
48
- num_heads: 8
49
- use_spatial_transformer: True
50
- transformer_depth: 1
51
- context_dim: 768
52
- use_checkpoint: True
53
- legacy: False
54
-
55
- first_stage_config:
56
- target: ldm.models.autoencoder.AutoencoderKL
57
- params:
58
- embed_dim: 4
59
- monitor: val/rec_loss
60
- ddconfig:
61
- double_z: true
62
- z_channels: 4
63
- resolution: 256
64
- in_channels: 3
65
- out_ch: 3
66
- ch: 128
67
- ch_mult:
68
- - 1
69
- - 2
70
- - 4
71
- - 4
72
- num_res_blocks: 2
73
- attn_resolutions: []
74
- dropout: 0.0
75
- lossconfig:
76
- target: torch.nn.Identity
77
-
78
- cond_stage_config:
79
- target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
control_sd15_hed.yaml DELETED
@@ -1,79 +0,0 @@
1
- model:
2
- target: cldm.cldm.ControlLDM
3
- params:
4
- linear_start: 0.00085
5
- linear_end: 0.0120
6
- num_timesteps_cond: 1
7
- log_every_t: 200
8
- timesteps: 1000
9
- first_stage_key: "jpg"
10
- cond_stage_key: "txt"
11
- control_key: "hint"
12
- image_size: 64
13
- channels: 4
14
- cond_stage_trainable: false
15
- conditioning_key: crossattn
16
- monitor: val/loss_simple_ema
17
- scale_factor: 0.18215
18
- use_ema: False
19
- only_mid_control: False
20
-
21
- control_stage_config:
22
- target: cldm.cldm.ControlNet
23
- params:
24
- image_size: 32 # unused
25
- in_channels: 4
26
- hint_channels: 3
27
- model_channels: 320
28
- attention_resolutions: [ 4, 2, 1 ]
29
- num_res_blocks: 2
30
- channel_mult: [ 1, 2, 4, 4 ]
31
- num_heads: 8
32
- use_spatial_transformer: True
33
- transformer_depth: 1
34
- context_dim: 768
35
- use_checkpoint: True
36
- legacy: False
37
-
38
- unet_config:
39
- target: cldm.cldm.ControlledUnetModel
40
- params:
41
- image_size: 32 # unused
42
- in_channels: 4
43
- out_channels: 4
44
- model_channels: 320
45
- attention_resolutions: [ 4, 2, 1 ]
46
- num_res_blocks: 2
47
- channel_mult: [ 1, 2, 4, 4 ]
48
- num_heads: 8
49
- use_spatial_transformer: True
50
- transformer_depth: 1
51
- context_dim: 768
52
- use_checkpoint: True
53
- legacy: False
54
-
55
- first_stage_config:
56
- target: ldm.models.autoencoder.AutoencoderKL
57
- params:
58
- embed_dim: 4
59
- monitor: val/rec_loss
60
- ddconfig:
61
- double_z: true
62
- z_channels: 4
63
- resolution: 256
64
- in_channels: 3
65
- out_ch: 3
66
- ch: 128
67
- ch_mult:
68
- - 1
69
- - 2
70
- - 4
71
- - 4
72
- num_res_blocks: 2
73
- attn_resolutions: []
74
- dropout: 0.0
75
- lossconfig:
76
- target: torch.nn.Identity
77
-
78
- cond_stage_config:
79
- target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
control_sd15_mlsd.yaml DELETED
@@ -1,79 +0,0 @@
1
- model:
2
- target: cldm.cldm.ControlLDM
3
- params:
4
- linear_start: 0.00085
5
- linear_end: 0.0120
6
- num_timesteps_cond: 1
7
- log_every_t: 200
8
- timesteps: 1000
9
- first_stage_key: "jpg"
10
- cond_stage_key: "txt"
11
- control_key: "hint"
12
- image_size: 64
13
- channels: 4
14
- cond_stage_trainable: false
15
- conditioning_key: crossattn
16
- monitor: val/loss_simple_ema
17
- scale_factor: 0.18215
18
- use_ema: False
19
- only_mid_control: False
20
-
21
- control_stage_config:
22
- target: cldm.cldm.ControlNet
23
- params:
24
- image_size: 32 # unused
25
- in_channels: 4
26
- hint_channels: 3
27
- model_channels: 320
28
- attention_resolutions: [ 4, 2, 1 ]
29
- num_res_blocks: 2
30
- channel_mult: [ 1, 2, 4, 4 ]
31
- num_heads: 8
32
- use_spatial_transformer: True
33
- transformer_depth: 1
34
- context_dim: 768
35
- use_checkpoint: True
36
- legacy: False
37
-
38
- unet_config:
39
- target: cldm.cldm.ControlledUnetModel
40
- params:
41
- image_size: 32 # unused
42
- in_channels: 4
43
- out_channels: 4
44
- model_channels: 320
45
- attention_resolutions: [ 4, 2, 1 ]
46
- num_res_blocks: 2
47
- channel_mult: [ 1, 2, 4, 4 ]
48
- num_heads: 8
49
- use_spatial_transformer: True
50
- transformer_depth: 1
51
- context_dim: 768
52
- use_checkpoint: True
53
- legacy: False
54
-
55
- first_stage_config:
56
- target: ldm.models.autoencoder.AutoencoderKL
57
- params:
58
- embed_dim: 4
59
- monitor: val/rec_loss
60
- ddconfig:
61
- double_z: true
62
- z_channels: 4
63
- resolution: 256
64
- in_channels: 3
65
- out_ch: 3
66
- ch: 128
67
- ch_mult:
68
- - 1
69
- - 2
70
- - 4
71
- - 4
72
- num_res_blocks: 2
73
- attn_resolutions: []
74
- dropout: 0.0
75
- lossconfig:
76
- target: torch.nn.Identity
77
-
78
- cond_stage_config:
79
- target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
control_sd15_normal.yaml DELETED
@@ -1,79 +0,0 @@
1
- model:
2
- target: cldm.cldm.ControlLDM
3
- params:
4
- linear_start: 0.00085
5
- linear_end: 0.0120
6
- num_timesteps_cond: 1
7
- log_every_t: 200
8
- timesteps: 1000
9
- first_stage_key: "jpg"
10
- cond_stage_key: "txt"
11
- control_key: "hint"
12
- image_size: 64
13
- channels: 4
14
- cond_stage_trainable: false
15
- conditioning_key: crossattn
16
- monitor: val/loss_simple_ema
17
- scale_factor: 0.18215
18
- use_ema: False
19
- only_mid_control: False
20
-
21
- control_stage_config:
22
- target: cldm.cldm.ControlNet
23
- params:
24
- image_size: 32 # unused
25
- in_channels: 4
26
- hint_channels: 3
27
- model_channels: 320
28
- attention_resolutions: [ 4, 2, 1 ]
29
- num_res_blocks: 2
30
- channel_mult: [ 1, 2, 4, 4 ]
31
- num_heads: 8
32
- use_spatial_transformer: True
33
- transformer_depth: 1
34
- context_dim: 768
35
- use_checkpoint: True
36
- legacy: False
37
-
38
- unet_config:
39
- target: cldm.cldm.ControlledUnetModel
40
- params:
41
- image_size: 32 # unused
42
- in_channels: 4
43
- out_channels: 4
44
- model_channels: 320
45
- attention_resolutions: [ 4, 2, 1 ]
46
- num_res_blocks: 2
47
- channel_mult: [ 1, 2, 4, 4 ]
48
- num_heads: 8
49
- use_spatial_transformer: True
50
- transformer_depth: 1
51
- context_dim: 768
52
- use_checkpoint: True
53
- legacy: False
54
-
55
- first_stage_config:
56
- target: ldm.models.autoencoder.AutoencoderKL
57
- params:
58
- embed_dim: 4
59
- monitor: val/rec_loss
60
- ddconfig:
61
- double_z: true
62
- z_channels: 4
63
- resolution: 256
64
- in_channels: 3
65
- out_ch: 3
66
- ch: 128
67
- ch_mult:
68
- - 1
69
- - 2
70
- - 4
71
- - 4
72
- num_res_blocks: 2
73
- attn_resolutions: []
74
- dropout: 0.0
75
- lossconfig:
76
- target: torch.nn.Identity
77
-
78
- cond_stage_config:
79
- target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
control_sd15_openpose.yaml DELETED
@@ -1,79 +0,0 @@
1
- model:
2
- target: cldm.cldm.ControlLDM
3
- params:
4
- linear_start: 0.00085
5
- linear_end: 0.0120
6
- num_timesteps_cond: 1
7
- log_every_t: 200
8
- timesteps: 1000
9
- first_stage_key: "jpg"
10
- cond_stage_key: "txt"
11
- control_key: "hint"
12
- image_size: 64
13
- channels: 4
14
- cond_stage_trainable: false
15
- conditioning_key: crossattn
16
- monitor: val/loss_simple_ema
17
- scale_factor: 0.18215
18
- use_ema: False
19
- only_mid_control: False
20
-
21
- control_stage_config:
22
- target: cldm.cldm.ControlNet
23
- params:
24
- image_size: 32 # unused
25
- in_channels: 4
26
- hint_channels: 3
27
- model_channels: 320
28
- attention_resolutions: [ 4, 2, 1 ]
29
- num_res_blocks: 2
30
- channel_mult: [ 1, 2, 4, 4 ]
31
- num_heads: 8
32
- use_spatial_transformer: True
33
- transformer_depth: 1
34
- context_dim: 768
35
- use_checkpoint: True
36
- legacy: False
37
-
38
- unet_config:
39
- target: cldm.cldm.ControlledUnetModel
40
- params:
41
- image_size: 32 # unused
42
- in_channels: 4
43
- out_channels: 4
44
- model_channels: 320
45
- attention_resolutions: [ 4, 2, 1 ]
46
- num_res_blocks: 2
47
- channel_mult: [ 1, 2, 4, 4 ]
48
- num_heads: 8
49
- use_spatial_transformer: True
50
- transformer_depth: 1
51
- context_dim: 768
52
- use_checkpoint: True
53
- legacy: False
54
-
55
- first_stage_config:
56
- target: ldm.models.autoencoder.AutoencoderKL
57
- params:
58
- embed_dim: 4
59
- monitor: val/rec_loss
60
- ddconfig:
61
- double_z: true
62
- z_channels: 4
63
- resolution: 256
64
- in_channels: 3
65
- out_ch: 3
66
- ch: 128
67
- ch_mult:
68
- - 1
69
- - 2
70
- - 4
71
- - 4
72
- num_res_blocks: 2
73
- attn_resolutions: []
74
- dropout: 0.0
75
- lossconfig:
76
- target: torch.nn.Identity
77
-
78
- cond_stage_config:
79
- target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
control_sd15_scribble.yaml DELETED
@@ -1,79 +0,0 @@
1
- model:
2
- target: cldm.cldm.ControlLDM
3
- params:
4
- linear_start: 0.00085
5
- linear_end: 0.0120
6
- num_timesteps_cond: 1
7
- log_every_t: 200
8
- timesteps: 1000
9
- first_stage_key: "jpg"
10
- cond_stage_key: "txt"
11
- control_key: "hint"
12
- image_size: 64
13
- channels: 4
14
- cond_stage_trainable: false
15
- conditioning_key: crossattn
16
- monitor: val/loss_simple_ema
17
- scale_factor: 0.18215
18
- use_ema: False
19
- only_mid_control: False
20
-
21
- control_stage_config:
22
- target: cldm.cldm.ControlNet
23
- params:
24
- image_size: 32 # unused
25
- in_channels: 4
26
- hint_channels: 3
27
- model_channels: 320
28
- attention_resolutions: [ 4, 2, 1 ]
29
- num_res_blocks: 2
30
- channel_mult: [ 1, 2, 4, 4 ]
31
- num_heads: 8
32
- use_spatial_transformer: True
33
- transformer_depth: 1
34
- context_dim: 768
35
- use_checkpoint: True
36
- legacy: False
37
-
38
- unet_config:
39
- target: cldm.cldm.ControlledUnetModel
40
- params:
41
- image_size: 32 # unused
42
- in_channels: 4
43
- out_channels: 4
44
- model_channels: 320
45
- attention_resolutions: [ 4, 2, 1 ]
46
- num_res_blocks: 2
47
- channel_mult: [ 1, 2, 4, 4 ]
48
- num_heads: 8
49
- use_spatial_transformer: True
50
- transformer_depth: 1
51
- context_dim: 768
52
- use_checkpoint: True
53
- legacy: False
54
-
55
- first_stage_config:
56
- target: ldm.models.autoencoder.AutoencoderKL
57
- params:
58
- embed_dim: 4
59
- monitor: val/rec_loss
60
- ddconfig:
61
- double_z: true
62
- z_channels: 4
63
- resolution: 256
64
- in_channels: 3
65
- out_ch: 3
66
- ch: 128
67
- ch_mult:
68
- - 1
69
- - 2
70
- - 4
71
- - 4
72
- num_res_blocks: 2
73
- attn_resolutions: []
74
- dropout: 0.0
75
- lossconfig:
76
- target: torch.nn.Identity
77
-
78
- cond_stage_config:
79
- target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
control_sd15_seg.yaml DELETED
@@ -1,79 +0,0 @@
1
- model:
2
- target: cldm.cldm.ControlLDM
3
- params:
4
- linear_start: 0.00085
5
- linear_end: 0.0120
6
- num_timesteps_cond: 1
7
- log_every_t: 200
8
- timesteps: 1000
9
- first_stage_key: "jpg"
10
- cond_stage_key: "txt"
11
- control_key: "hint"
12
- image_size: 64
13
- channels: 4
14
- cond_stage_trainable: false
15
- conditioning_key: crossattn
16
- monitor: val/loss_simple_ema
17
- scale_factor: 0.18215
18
- use_ema: False
19
- only_mid_control: False
20
-
21
- control_stage_config:
22
- target: cldm.cldm.ControlNet
23
- params:
24
- image_size: 32 # unused
25
- in_channels: 4
26
- hint_channels: 3
27
- model_channels: 320
28
- attention_resolutions: [ 4, 2, 1 ]
29
- num_res_blocks: 2
30
- channel_mult: [ 1, 2, 4, 4 ]
31
- num_heads: 8
32
- use_spatial_transformer: True
33
- transformer_depth: 1
34
- context_dim: 768
35
- use_checkpoint: True
36
- legacy: False
37
-
38
- unet_config:
39
- target: cldm.cldm.ControlledUnetModel
40
- params:
41
- image_size: 32 # unused
42
- in_channels: 4
43
- out_channels: 4
44
- model_channels: 320
45
- attention_resolutions: [ 4, 2, 1 ]
46
- num_res_blocks: 2
47
- channel_mult: [ 1, 2, 4, 4 ]
48
- num_heads: 8
49
- use_spatial_transformer: True
50
- transformer_depth: 1
51
- context_dim: 768
52
- use_checkpoint: True
53
- legacy: False
54
-
55
- first_stage_config:
56
- target: ldm.models.autoencoder.AutoencoderKL
57
- params:
58
- embed_dim: 4
59
- monitor: val/rec_loss
60
- ddconfig:
61
- double_z: true
62
- z_channels: 4
63
- resolution: 256
64
- in_channels: 3
65
- out_ch: 3
66
- ch: 128
67
- ch_mult:
68
- - 1
69
- - 2
70
- - 4
71
- - 4
72
- num_res_blocks: 2
73
- attn_resolutions: []
74
- dropout: 0.0
75
- lossconfig:
76
- target: torch.nn.Identity
77
-
78
- cond_stage_config:
79
- target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
control_v11e_sd15_ip2p.yaml DELETED
@@ -1,79 +0,0 @@
1
- model:
2
- target: cldm.cldm.ControlLDM
3
- params:
4
- linear_start: 0.00085
5
- linear_end: 0.0120
6
- num_timesteps_cond: 1
7
- log_every_t: 200
8
- timesteps: 1000
9
- first_stage_key: "jpg"
10
- cond_stage_key: "txt"
11
- control_key: "hint"
12
- image_size: 64
13
- channels: 4
14
- cond_stage_trainable: false
15
- conditioning_key: crossattn
16
- monitor: val/loss_simple_ema
17
- scale_factor: 0.18215
18
- use_ema: False
19
- only_mid_control: False
20
-
21
- control_stage_config:
22
- target: cldm.cldm.ControlNet
23
- params:
24
- image_size: 32 # unused
25
- in_channels: 4
26
- hint_channels: 3
27
- model_channels: 320
28
- attention_resolutions: [ 4, 2, 1 ]
29
- num_res_blocks: 2
30
- channel_mult: [ 1, 2, 4, 4 ]
31
- num_heads: 8
32
- use_spatial_transformer: True
33
- transformer_depth: 1
34
- context_dim: 768
35
- use_checkpoint: True
36
- legacy: False
37
-
38
- unet_config:
39
- target: cldm.cldm.ControlledUnetModel
40
- params:
41
- image_size: 32 # unused
42
- in_channels: 4
43
- out_channels: 4
44
- model_channels: 320
45
- attention_resolutions: [ 4, 2, 1 ]
46
- num_res_blocks: 2
47
- channel_mult: [ 1, 2, 4, 4 ]
48
- num_heads: 8
49
- use_spatial_transformer: True
50
- transformer_depth: 1
51
- context_dim: 768
52
- use_checkpoint: True
53
- legacy: False
54
-
55
- first_stage_config:
56
- target: ldm.models.autoencoder.AutoencoderKL
57
- params:
58
- embed_dim: 4
59
- monitor: val/rec_loss
60
- ddconfig:
61
- double_z: true
62
- z_channels: 4
63
- resolution: 256
64
- in_channels: 3
65
- out_ch: 3
66
- ch: 128
67
- ch_mult:
68
- - 1
69
- - 2
70
- - 4
71
- - 4
72
- num_res_blocks: 2
73
- attn_resolutions: []
74
- dropout: 0.0
75
- lossconfig:
76
- target: torch.nn.Identity
77
-
78
- cond_stage_config:
79
- target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
control_v11e_sd15_ip2p_fp16.yaml DELETED
@@ -1,79 +0,0 @@
1
- model:
2
- target: cldm.cldm.ControlLDM
3
- params:
4
- linear_start: 0.00085
5
- linear_end: 0.0120
6
- num_timesteps_cond: 1
7
- log_every_t: 200
8
- timesteps: 1000
9
- first_stage_key: "jpg"
10
- cond_stage_key: "txt"
11
- control_key: "hint"
12
- image_size: 64
13
- channels: 4
14
- cond_stage_trainable: false
15
- conditioning_key: crossattn
16
- monitor: val/loss_simple_ema
17
- scale_factor: 0.18215
18
- use_ema: False
19
- only_mid_control: False
20
-
21
- control_stage_config:
22
- target: cldm.cldm.ControlNet
23
- params:
24
- image_size: 32 # unused
25
- in_channels: 4
26
- hint_channels: 3
27
- model_channels: 320
28
- attention_resolutions: [ 4, 2, 1 ]
29
- num_res_blocks: 2
30
- channel_mult: [ 1, 2, 4, 4 ]
31
- num_heads: 8
32
- use_spatial_transformer: True
33
- transformer_depth: 1
34
- context_dim: 768
35
- use_checkpoint: True
36
- legacy: False
37
-
38
- unet_config:
39
- target: cldm.cldm.ControlledUnetModel
40
- params:
41
- image_size: 32 # unused
42
- in_channels: 4
43
- out_channels: 4
44
- model_channels: 320
45
- attention_resolutions: [ 4, 2, 1 ]
46
- num_res_blocks: 2
47
- channel_mult: [ 1, 2, 4, 4 ]
48
- num_heads: 8
49
- use_spatial_transformer: True
50
- transformer_depth: 1
51
- context_dim: 768
52
- use_checkpoint: True
53
- legacy: False
54
-
55
- first_stage_config:
56
- target: ldm.models.autoencoder.AutoencoderKL
57
- params:
58
- embed_dim: 4
59
- monitor: val/rec_loss
60
- ddconfig:
61
- double_z: true
62
- z_channels: 4
63
- resolution: 256
64
- in_channels: 3
65
- out_ch: 3
66
- ch: 128
67
- ch_mult:
68
- - 1
69
- - 2
70
- - 4
71
- - 4
72
- num_res_blocks: 2
73
- attn_resolutions: []
74
- dropout: 0.0
75
- lossconfig:
76
- target: torch.nn.Identity
77
-
78
- cond_stage_config:
79
- target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
control_v11e_sd15_shuffle.yaml DELETED
@@ -1,80 +0,0 @@
1
- model:
2
- target: cldm.cldm.ControlLDM
3
- params:
4
- linear_start: 0.00085
5
- linear_end: 0.0120
6
- num_timesteps_cond: 1
7
- log_every_t: 200
8
- timesteps: 1000
9
- first_stage_key: "jpg"
10
- cond_stage_key: "txt"
11
- control_key: "hint"
12
- image_size: 64
13
- channels: 4
14
- cond_stage_trainable: false
15
- conditioning_key: crossattn
16
- monitor: val/loss_simple_ema
17
- scale_factor: 0.18215
18
- use_ema: False
19
- only_mid_control: False
20
- global_average_pooling: True
21
-
22
- control_stage_config:
23
- target: cldm.cldm.ControlNet
24
- params:
25
- image_size: 32 # unused
26
- in_channels: 4
27
- hint_channels: 3
28
- model_channels: 320
29
- attention_resolutions: [ 4, 2, 1 ]
30
- num_res_blocks: 2
31
- channel_mult: [ 1, 2, 4, 4 ]
32
- num_heads: 8
33
- use_spatial_transformer: True
34
- transformer_depth: 1
35
- context_dim: 768
36
- use_checkpoint: True
37
- legacy: False
38
-
39
- unet_config:
40
- target: cldm.cldm.ControlledUnetModel
41
- params:
42
- image_size: 32 # unused
43
- in_channels: 4
44
- out_channels: 4
45
- model_channels: 320
46
- attention_resolutions: [ 4, 2, 1 ]
47
- num_res_blocks: 2
48
- channel_mult: [ 1, 2, 4, 4 ]
49
- num_heads: 8
50
- use_spatial_transformer: True
51
- transformer_depth: 1
52
- context_dim: 768
53
- use_checkpoint: True
54
- legacy: False
55
-
56
- first_stage_config:
57
- target: ldm.models.autoencoder.AutoencoderKL
58
- params:
59
- embed_dim: 4
60
- monitor: val/rec_loss
61
- ddconfig:
62
- double_z: true
63
- z_channels: 4
64
- resolution: 256
65
- in_channels: 3
66
- out_ch: 3
67
- ch: 128
68
- ch_mult:
69
- - 1
70
- - 2
71
- - 4
72
- - 4
73
- num_res_blocks: 2
74
- attn_resolutions: []
75
- dropout: 0.0
76
- lossconfig:
77
- target: torch.nn.Identity
78
-
79
- cond_stage_config:
80
- target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
control_v11e_sd15_shuffle_fp16.yaml DELETED
@@ -1,80 +0,0 @@
1
- model:
2
- target: cldm.cldm.ControlLDM
3
- params:
4
- linear_start: 0.00085
5
- linear_end: 0.0120
6
- num_timesteps_cond: 1
7
- log_every_t: 200
8
- timesteps: 1000
9
- first_stage_key: "jpg"
10
- cond_stage_key: "txt"
11
- control_key: "hint"
12
- image_size: 64
13
- channels: 4
14
- cond_stage_trainable: false
15
- conditioning_key: crossattn
16
- monitor: val/loss_simple_ema
17
- scale_factor: 0.18215
18
- use_ema: False
19
- only_mid_control: False
20
- global_average_pooling: True
21
-
22
- control_stage_config:
23
- target: cldm.cldm.ControlNet
24
- params:
25
- image_size: 32 # unused
26
- in_channels: 4
27
- hint_channels: 3
28
- model_channels: 320
29
- attention_resolutions: [ 4, 2, 1 ]
30
- num_res_blocks: 2
31
- channel_mult: [ 1, 2, 4, 4 ]
32
- num_heads: 8
33
- use_spatial_transformer: True
34
- transformer_depth: 1
35
- context_dim: 768
36
- use_checkpoint: True
37
- legacy: False
38
-
39
- unet_config:
40
- target: cldm.cldm.ControlledUnetModel
41
- params:
42
- image_size: 32 # unused
43
- in_channels: 4
44
- out_channels: 4
45
- model_channels: 320
46
- attention_resolutions: [ 4, 2, 1 ]
47
- num_res_blocks: 2
48
- channel_mult: [ 1, 2, 4, 4 ]
49
- num_heads: 8
50
- use_spatial_transformer: True
51
- transformer_depth: 1
52
- context_dim: 768
53
- use_checkpoint: True
54
- legacy: False
55
-
56
- first_stage_config:
57
- target: ldm.models.autoencoder.AutoencoderKL
58
- params:
59
- embed_dim: 4
60
- monitor: val/rec_loss
61
- ddconfig:
62
- double_z: true
63
- z_channels: 4
64
- resolution: 256
65
- in_channels: 3
66
- out_ch: 3
67
- ch: 128
68
- ch_mult:
69
- - 1
70
- - 2
71
- - 4
72
- - 4
73
- num_res_blocks: 2
74
- attn_resolutions: []
75
- dropout: 0.0
76
- lossconfig:
77
- target: torch.nn.Identity
78
-
79
- cond_stage_config:
80
- target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
control_v11f1e_sd15_tile.yaml DELETED
@@ -1,79 +0,0 @@
1
- model:
2
- target: cldm.cldm.ControlLDM
3
- params:
4
- linear_start: 0.00085
5
- linear_end: 0.0120
6
- num_timesteps_cond: 1
7
- log_every_t: 200
8
- timesteps: 1000
9
- first_stage_key: "jpg"
10
- cond_stage_key: "txt"
11
- control_key: "hint"
12
- image_size: 64
13
- channels: 4
14
- cond_stage_trainable: false
15
- conditioning_key: crossattn
16
- monitor: val/loss_simple_ema
17
- scale_factor: 0.18215
18
- use_ema: False
19
- only_mid_control: False
20
-
21
- control_stage_config:
22
- target: cldm.cldm.ControlNet
23
- params:
24
- image_size: 32 # unused
25
- in_channels: 4
26
- hint_channels: 3
27
- model_channels: 320
28
- attention_resolutions: [ 4, 2, 1 ]
29
- num_res_blocks: 2
30
- channel_mult: [ 1, 2, 4, 4 ]
31
- num_heads: 8
32
- use_spatial_transformer: True
33
- transformer_depth: 1
34
- context_dim: 768
35
- use_checkpoint: True
36
- legacy: False
37
-
38
- unet_config:
39
- target: cldm.cldm.ControlledUnetModel
40
- params:
41
- image_size: 32 # unused
42
- in_channels: 4
43
- out_channels: 4
44
- model_channels: 320
45
- attention_resolutions: [ 4, 2, 1 ]
46
- num_res_blocks: 2
47
- channel_mult: [ 1, 2, 4, 4 ]
48
- num_heads: 8
49
- use_spatial_transformer: True
50
- transformer_depth: 1
51
- context_dim: 768
52
- use_checkpoint: True
53
- legacy: False
54
-
55
- first_stage_config:
56
- target: ldm.models.autoencoder.AutoencoderKL
57
- params:
58
- embed_dim: 4
59
- monitor: val/rec_loss
60
- ddconfig:
61
- double_z: true
62
- z_channels: 4
63
- resolution: 256
64
- in_channels: 3
65
- out_ch: 3
66
- ch: 128
67
- ch_mult:
68
- - 1
69
- - 2
70
- - 4
71
- - 4
72
- num_res_blocks: 2
73
- attn_resolutions: []
74
- dropout: 0.0
75
- lossconfig:
76
- target: torch.nn.Identity
77
-
78
- cond_stage_config:
79
- target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
control_v11f1e_sd15_tile_fp16.yaml DELETED
@@ -1,79 +0,0 @@
1
- model:
2
- target: cldm.cldm.ControlLDM
3
- params:
4
- linear_start: 0.00085
5
- linear_end: 0.0120
6
- num_timesteps_cond: 1
7
- log_every_t: 200
8
- timesteps: 1000
9
- first_stage_key: "jpg"
10
- cond_stage_key: "txt"
11
- control_key: "hint"
12
- image_size: 64
13
- channels: 4
14
- cond_stage_trainable: false
15
- conditioning_key: crossattn
16
- monitor: val/loss_simple_ema
17
- scale_factor: 0.18215
18
- use_ema: False
19
- only_mid_control: False
20
-
21
- control_stage_config:
22
- target: cldm.cldm.ControlNet
23
- params:
24
- image_size: 32 # unused
25
- in_channels: 4
26
- hint_channels: 3
27
- model_channels: 320
28
- attention_resolutions: [ 4, 2, 1 ]
29
- num_res_blocks: 2
30
- channel_mult: [ 1, 2, 4, 4 ]
31
- num_heads: 8
32
- use_spatial_transformer: True
33
- transformer_depth: 1
34
- context_dim: 768
35
- use_checkpoint: True
36
- legacy: False
37
-
38
- unet_config:
39
- target: cldm.cldm.ControlledUnetModel
40
- params:
41
- image_size: 32 # unused
42
- in_channels: 4
43
- out_channels: 4
44
- model_channels: 320
45
- attention_resolutions: [ 4, 2, 1 ]
46
- num_res_blocks: 2
47
- channel_mult: [ 1, 2, 4, 4 ]
48
- num_heads: 8
49
- use_spatial_transformer: True
50
- transformer_depth: 1
51
- context_dim: 768
52
- use_checkpoint: True
53
- legacy: False
54
-
55
- first_stage_config:
56
- target: ldm.models.autoencoder.AutoencoderKL
57
- params:
58
- embed_dim: 4
59
- monitor: val/rec_loss
60
- ddconfig:
61
- double_z: true
62
- z_channels: 4
63
- resolution: 256
64
- in_channels: 3
65
- out_ch: 3
66
- ch: 128
67
- ch_mult:
68
- - 1
69
- - 2
70
- - 4
71
- - 4
72
- num_res_blocks: 2
73
- attn_resolutions: []
74
- dropout: 0.0
75
- lossconfig:
76
- target: torch.nn.Identity
77
-
78
- cond_stage_config:
79
- target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
control_v11f1p_sd15_depth.yaml DELETED
@@ -1,79 +0,0 @@
1
- model:
2
- target: cldm.cldm.ControlLDM
3
- params:
4
- linear_start: 0.00085
5
- linear_end: 0.0120
6
- num_timesteps_cond: 1
7
- log_every_t: 200
8
- timesteps: 1000
9
- first_stage_key: "jpg"
10
- cond_stage_key: "txt"
11
- control_key: "hint"
12
- image_size: 64
13
- channels: 4
14
- cond_stage_trainable: false
15
- conditioning_key: crossattn
16
- monitor: val/loss_simple_ema
17
- scale_factor: 0.18215
18
- use_ema: False
19
- only_mid_control: False
20
-
21
- control_stage_config:
22
- target: cldm.cldm.ControlNet
23
- params:
24
- image_size: 32 # unused
25
- in_channels: 4
26
- hint_channels: 3
27
- model_channels: 320
28
- attention_resolutions: [ 4, 2, 1 ]
29
- num_res_blocks: 2
30
- channel_mult: [ 1, 2, 4, 4 ]
31
- num_heads: 8
32
- use_spatial_transformer: True
33
- transformer_depth: 1
34
- context_dim: 768
35
- use_checkpoint: True
36
- legacy: False
37
-
38
- unet_config:
39
- target: cldm.cldm.ControlledUnetModel
40
- params:
41
- image_size: 32 # unused
42
- in_channels: 4
43
- out_channels: 4
44
- model_channels: 320
45
- attention_resolutions: [ 4, 2, 1 ]
46
- num_res_blocks: 2
47
- channel_mult: [ 1, 2, 4, 4 ]
48
- num_heads: 8
49
- use_spatial_transformer: True
50
- transformer_depth: 1
51
- context_dim: 768
52
- use_checkpoint: True
53
- legacy: False
54
-
55
- first_stage_config:
56
- target: ldm.models.autoencoder.AutoencoderKL
57
- params:
58
- embed_dim: 4
59
- monitor: val/rec_loss
60
- ddconfig:
61
- double_z: true
62
- z_channels: 4
63
- resolution: 256
64
- in_channels: 3
65
- out_ch: 3
66
- ch: 128
67
- ch_mult:
68
- - 1
69
- - 2
70
- - 4
71
- - 4
72
- num_res_blocks: 2
73
- attn_resolutions: []
74
- dropout: 0.0
75
- lossconfig:
76
- target: torch.nn.Identity
77
-
78
- cond_stage_config:
79
- target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
control_v11f1p_sd15_depth_fp16.yaml DELETED
@@ -1,79 +0,0 @@
1
- model:
2
- target: cldm.cldm.ControlLDM
3
- params:
4
- linear_start: 0.00085
5
- linear_end: 0.0120
6
- num_timesteps_cond: 1
7
- log_every_t: 200
8
- timesteps: 1000
9
- first_stage_key: "jpg"
10
- cond_stage_key: "txt"
11
- control_key: "hint"
12
- image_size: 64
13
- channels: 4
14
- cond_stage_trainable: false
15
- conditioning_key: crossattn
16
- monitor: val/loss_simple_ema
17
- scale_factor: 0.18215
18
- use_ema: False
19
- only_mid_control: False
20
-
21
- control_stage_config:
22
- target: cldm.cldm.ControlNet
23
- params:
24
- image_size: 32 # unused
25
- in_channels: 4
26
- hint_channels: 3
27
- model_channels: 320
28
- attention_resolutions: [ 4, 2, 1 ]
29
- num_res_blocks: 2
30
- channel_mult: [ 1, 2, 4, 4 ]
31
- num_heads: 8
32
- use_spatial_transformer: True
33
- transformer_depth: 1
34
- context_dim: 768
35
- use_checkpoint: True
36
- legacy: False
37
-
38
- unet_config:
39
- target: cldm.cldm.ControlledUnetModel
40
- params:
41
- image_size: 32 # unused
42
- in_channels: 4
43
- out_channels: 4
44
- model_channels: 320
45
- attention_resolutions: [ 4, 2, 1 ]
46
- num_res_blocks: 2
47
- channel_mult: [ 1, 2, 4, 4 ]
48
- num_heads: 8
49
- use_spatial_transformer: True
50
- transformer_depth: 1
51
- context_dim: 768
52
- use_checkpoint: True
53
- legacy: False
54
-
55
- first_stage_config:
56
- target: ldm.models.autoencoder.AutoencoderKL
57
- params:
58
- embed_dim: 4
59
- monitor: val/rec_loss
60
- ddconfig:
61
- double_z: true
62
- z_channels: 4
63
- resolution: 256
64
- in_channels: 3
65
- out_ch: 3
66
- ch: 128
67
- ch_mult:
68
- - 1
69
- - 2
70
- - 4
71
- - 4
72
- num_res_blocks: 2
73
- attn_resolutions: []
74
- dropout: 0.0
75
- lossconfig:
76
- target: torch.nn.Identity
77
-
78
- cond_stage_config:
79
- target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
control_v11p_sd15_canny.yaml DELETED
@@ -1,79 +0,0 @@
1
- model:
2
- target: cldm.cldm.ControlLDM
3
- params:
4
- linear_start: 0.00085
5
- linear_end: 0.0120
6
- num_timesteps_cond: 1
7
- log_every_t: 200
8
- timesteps: 1000
9
- first_stage_key: "jpg"
10
- cond_stage_key: "txt"
11
- control_key: "hint"
12
- image_size: 64
13
- channels: 4
14
- cond_stage_trainable: false
15
- conditioning_key: crossattn
16
- monitor: val/loss_simple_ema
17
- scale_factor: 0.18215
18
- use_ema: False
19
- only_mid_control: False
20
-
21
- control_stage_config:
22
- target: cldm.cldm.ControlNet
23
- params:
24
- image_size: 32 # unused
25
- in_channels: 4
26
- hint_channels: 3
27
- model_channels: 320
28
- attention_resolutions: [ 4, 2, 1 ]
29
- num_res_blocks: 2
30
- channel_mult: [ 1, 2, 4, 4 ]
31
- num_heads: 8
32
- use_spatial_transformer: True
33
- transformer_depth: 1
34
- context_dim: 768
35
- use_checkpoint: True
36
- legacy: False
37
-
38
- unet_config:
39
- target: cldm.cldm.ControlledUnetModel
40
- params:
41
- image_size: 32 # unused
42
- in_channels: 4
43
- out_channels: 4
44
- model_channels: 320
45
- attention_resolutions: [ 4, 2, 1 ]
46
- num_res_blocks: 2
47
- channel_mult: [ 1, 2, 4, 4 ]
48
- num_heads: 8
49
- use_spatial_transformer: True
50
- transformer_depth: 1
51
- context_dim: 768
52
- use_checkpoint: True
53
- legacy: False
54
-
55
- first_stage_config:
56
- target: ldm.models.autoencoder.AutoencoderKL
57
- params:
58
- embed_dim: 4
59
- monitor: val/rec_loss
60
- ddconfig:
61
- double_z: true
62
- z_channels: 4
63
- resolution: 256
64
- in_channels: 3
65
- out_ch: 3
66
- ch: 128
67
- ch_mult:
68
- - 1
69
- - 2
70
- - 4
71
- - 4
72
- num_res_blocks: 2
73
- attn_resolutions: []
74
- dropout: 0.0
75
- lossconfig:
76
- target: torch.nn.Identity
77
-
78
- cond_stage_config:
79
- target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
control_v11p_sd15_canny_fp16.yaml DELETED
@@ -1,79 +0,0 @@
1
- model:
2
- target: cldm.cldm.ControlLDM
3
- params:
4
- linear_start: 0.00085
5
- linear_end: 0.0120
6
- num_timesteps_cond: 1
7
- log_every_t: 200
8
- timesteps: 1000
9
- first_stage_key: "jpg"
10
- cond_stage_key: "txt"
11
- control_key: "hint"
12
- image_size: 64
13
- channels: 4
14
- cond_stage_trainable: false
15
- conditioning_key: crossattn
16
- monitor: val/loss_simple_ema
17
- scale_factor: 0.18215
18
- use_ema: False
19
- only_mid_control: False
20
-
21
- control_stage_config:
22
- target: cldm.cldm.ControlNet
23
- params:
24
- image_size: 32 # unused
25
- in_channels: 4
26
- hint_channels: 3
27
- model_channels: 320
28
- attention_resolutions: [ 4, 2, 1 ]
29
- num_res_blocks: 2
30
- channel_mult: [ 1, 2, 4, 4 ]
31
- num_heads: 8
32
- use_spatial_transformer: True
33
- transformer_depth: 1
34
- context_dim: 768
35
- use_checkpoint: True
36
- legacy: False
37
-
38
- unet_config:
39
- target: cldm.cldm.ControlledUnetModel
40
- params:
41
- image_size: 32 # unused
42
- in_channels: 4
43
- out_channels: 4
44
- model_channels: 320
45
- attention_resolutions: [ 4, 2, 1 ]
46
- num_res_blocks: 2
47
- channel_mult: [ 1, 2, 4, 4 ]
48
- num_heads: 8
49
- use_spatial_transformer: True
50
- transformer_depth: 1
51
- context_dim: 768
52
- use_checkpoint: True
53
- legacy: False
54
-
55
- first_stage_config:
56
- target: ldm.models.autoencoder.AutoencoderKL
57
- params:
58
- embed_dim: 4
59
- monitor: val/rec_loss
60
- ddconfig:
61
- double_z: true
62
- z_channels: 4
63
- resolution: 256
64
- in_channels: 3
65
- out_ch: 3
66
- ch: 128
67
- ch_mult:
68
- - 1
69
- - 2
70
- - 4
71
- - 4
72
- num_res_blocks: 2
73
- attn_resolutions: []
74
- dropout: 0.0
75
- lossconfig:
76
- target: torch.nn.Identity
77
-
78
- cond_stage_config:
79
- target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
control_v11p_sd15_inpaint.yaml DELETED
@@ -1,79 +0,0 @@
1
- model:
2
- target: cldm.cldm.ControlLDM
3
- params:
4
- linear_start: 0.00085
5
- linear_end: 0.0120
6
- num_timesteps_cond: 1
7
- log_every_t: 200
8
- timesteps: 1000
9
- first_stage_key: "jpg"
10
- cond_stage_key: "txt"
11
- control_key: "hint"
12
- image_size: 64
13
- channels: 4
14
- cond_stage_trainable: false
15
- conditioning_key: crossattn
16
- monitor: val/loss_simple_ema
17
- scale_factor: 0.18215
18
- use_ema: False
19
- only_mid_control: False
20
-
21
- control_stage_config:
22
- target: cldm.cldm.ControlNet
23
- params:
24
- image_size: 32 # unused
25
- in_channels: 4
26
- hint_channels: 3
27
- model_channels: 320
28
- attention_resolutions: [ 4, 2, 1 ]
29
- num_res_blocks: 2
30
- channel_mult: [ 1, 2, 4, 4 ]
31
- num_heads: 8
32
- use_spatial_transformer: True
33
- transformer_depth: 1
34
- context_dim: 768
35
- use_checkpoint: True
36
- legacy: False
37
-
38
- unet_config:
39
- target: cldm.cldm.ControlledUnetModel
40
- params:
41
- image_size: 32 # unused
42
- in_channels: 4
43
- out_channels: 4
44
- model_channels: 320
45
- attention_resolutions: [ 4, 2, 1 ]
46
- num_res_blocks: 2
47
- channel_mult: [ 1, 2, 4, 4 ]
48
- num_heads: 8
49
- use_spatial_transformer: True
50
- transformer_depth: 1
51
- context_dim: 768
52
- use_checkpoint: True
53
- legacy: False
54
-
55
- first_stage_config:
56
- target: ldm.models.autoencoder.AutoencoderKL
57
- params:
58
- embed_dim: 4
59
- monitor: val/rec_loss
60
- ddconfig:
61
- double_z: true
62
- z_channels: 4
63
- resolution: 256
64
- in_channels: 3
65
- out_ch: 3
66
- ch: 128
67
- ch_mult:
68
- - 1
69
- - 2
70
- - 4
71
- - 4
72
- num_res_blocks: 2
73
- attn_resolutions: []
74
- dropout: 0.0
75
- lossconfig:
76
- target: torch.nn.Identity
77
-
78
- cond_stage_config:
79
- target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
control_v11p_sd15_inpaint_fp16.yaml DELETED
@@ -1,79 +0,0 @@
1
- model:
2
- target: cldm.cldm.ControlLDM
3
- params:
4
- linear_start: 0.00085
5
- linear_end: 0.0120
6
- num_timesteps_cond: 1
7
- log_every_t: 200
8
- timesteps: 1000
9
- first_stage_key: "jpg"
10
- cond_stage_key: "txt"
11
- control_key: "hint"
12
- image_size: 64
13
- channels: 4
14
- cond_stage_trainable: false
15
- conditioning_key: crossattn
16
- monitor: val/loss_simple_ema
17
- scale_factor: 0.18215
18
- use_ema: False
19
- only_mid_control: False
20
-
21
- control_stage_config:
22
- target: cldm.cldm.ControlNet
23
- params:
24
- image_size: 32 # unused
25
- in_channels: 4
26
- hint_channels: 3
27
- model_channels: 320
28
- attention_resolutions: [ 4, 2, 1 ]
29
- num_res_blocks: 2
30
- channel_mult: [ 1, 2, 4, 4 ]
31
- num_heads: 8
32
- use_spatial_transformer: True
33
- transformer_depth: 1
34
- context_dim: 768
35
- use_checkpoint: True
36
- legacy: False
37
-
38
- unet_config:
39
- target: cldm.cldm.ControlledUnetModel
40
- params:
41
- image_size: 32 # unused
42
- in_channels: 4
43
- out_channels: 4
44
- model_channels: 320
45
- attention_resolutions: [ 4, 2, 1 ]
46
- num_res_blocks: 2
47
- channel_mult: [ 1, 2, 4, 4 ]
48
- num_heads: 8
49
- use_spatial_transformer: True
50
- transformer_depth: 1
51
- context_dim: 768
52
- use_checkpoint: True
53
- legacy: False
54
-
55
- first_stage_config:
56
- target: ldm.models.autoencoder.AutoencoderKL
57
- params:
58
- embed_dim: 4
59
- monitor: val/rec_loss
60
- ddconfig:
61
- double_z: true
62
- z_channels: 4
63
- resolution: 256
64
- in_channels: 3
65
- out_ch: 3
66
- ch: 128
67
- ch_mult:
68
- - 1
69
- - 2
70
- - 4
71
- - 4
72
- num_res_blocks: 2
73
- attn_resolutions: []
74
- dropout: 0.0
75
- lossconfig:
76
- target: torch.nn.Identity
77
-
78
- cond_stage_config:
79
- target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
control_v11p_sd15_lineart.yaml DELETED
@@ -1,79 +0,0 @@
1
- model:
2
- target: cldm.cldm.ControlLDM
3
- params:
4
- linear_start: 0.00085
5
- linear_end: 0.0120
6
- num_timesteps_cond: 1
7
- log_every_t: 200
8
- timesteps: 1000
9
- first_stage_key: "jpg"
10
- cond_stage_key: "txt"
11
- control_key: "hint"
12
- image_size: 64
13
- channels: 4
14
- cond_stage_trainable: false
15
- conditioning_key: crossattn
16
- monitor: val/loss_simple_ema
17
- scale_factor: 0.18215
18
- use_ema: False
19
- only_mid_control: False
20
-
21
- control_stage_config:
22
- target: cldm.cldm.ControlNet
23
- params:
24
- image_size: 32 # unused
25
- in_channels: 4
26
- hint_channels: 3
27
- model_channels: 320
28
- attention_resolutions: [ 4, 2, 1 ]
29
- num_res_blocks: 2
30
- channel_mult: [ 1, 2, 4, 4 ]
31
- num_heads: 8
32
- use_spatial_transformer: True
33
- transformer_depth: 1
34
- context_dim: 768
35
- use_checkpoint: True
36
- legacy: False
37
-
38
- unet_config:
39
- target: cldm.cldm.ControlledUnetModel
40
- params:
41
- image_size: 32 # unused
42
- in_channels: 4
43
- out_channels: 4
44
- model_channels: 320
45
- attention_resolutions: [ 4, 2, 1 ]
46
- num_res_blocks: 2
47
- channel_mult: [ 1, 2, 4, 4 ]
48
- num_heads: 8
49
- use_spatial_transformer: True
50
- transformer_depth: 1
51
- context_dim: 768
52
- use_checkpoint: True
53
- legacy: False
54
-
55
- first_stage_config:
56
- target: ldm.models.autoencoder.AutoencoderKL
57
- params:
58
- embed_dim: 4
59
- monitor: val/rec_loss
60
- ddconfig:
61
- double_z: true
62
- z_channels: 4
63
- resolution: 256
64
- in_channels: 3
65
- out_ch: 3
66
- ch: 128
67
- ch_mult:
68
- - 1
69
- - 2
70
- - 4
71
- - 4
72
- num_res_blocks: 2
73
- attn_resolutions: []
74
- dropout: 0.0
75
- lossconfig:
76
- target: torch.nn.Identity
77
-
78
- cond_stage_config:
79
- target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
control_v11p_sd15_lineart_fp16.yaml DELETED
@@ -1,79 +0,0 @@
1
- model:
2
- target: cldm.cldm.ControlLDM
3
- params:
4
- linear_start: 0.00085
5
- linear_end: 0.0120
6
- num_timesteps_cond: 1
7
- log_every_t: 200
8
- timesteps: 1000
9
- first_stage_key: "jpg"
10
- cond_stage_key: "txt"
11
- control_key: "hint"
12
- image_size: 64
13
- channels: 4
14
- cond_stage_trainable: false
15
- conditioning_key: crossattn
16
- monitor: val/loss_simple_ema
17
- scale_factor: 0.18215
18
- use_ema: False
19
- only_mid_control: False
20
-
21
- control_stage_config:
22
- target: cldm.cldm.ControlNet
23
- params:
24
- image_size: 32 # unused
25
- in_channels: 4
26
- hint_channels: 3
27
- model_channels: 320
28
- attention_resolutions: [ 4, 2, 1 ]
29
- num_res_blocks: 2
30
- channel_mult: [ 1, 2, 4, 4 ]
31
- num_heads: 8
32
- use_spatial_transformer: True
33
- transformer_depth: 1
34
- context_dim: 768
35
- use_checkpoint: True
36
- legacy: False
37
-
38
- unet_config:
39
- target: cldm.cldm.ControlledUnetModel
40
- params:
41
- image_size: 32 # unused
42
- in_channels: 4
43
- out_channels: 4
44
- model_channels: 320
45
- attention_resolutions: [ 4, 2, 1 ]
46
- num_res_blocks: 2
47
- channel_mult: [ 1, 2, 4, 4 ]
48
- num_heads: 8
49
- use_spatial_transformer: True
50
- transformer_depth: 1
51
- context_dim: 768
52
- use_checkpoint: True
53
- legacy: False
54
-
55
- first_stage_config:
56
- target: ldm.models.autoencoder.AutoencoderKL
57
- params:
58
- embed_dim: 4
59
- monitor: val/rec_loss
60
- ddconfig:
61
- double_z: true
62
- z_channels: 4
63
- resolution: 256
64
- in_channels: 3
65
- out_ch: 3
66
- ch: 128
67
- ch_mult:
68
- - 1
69
- - 2
70
- - 4
71
- - 4
72
- num_res_blocks: 2
73
- attn_resolutions: []
74
- dropout: 0.0
75
- lossconfig:
76
- target: torch.nn.Identity
77
-
78
- cond_stage_config:
79
- target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
control_v11p_sd15_mlsd.yaml DELETED
@@ -1,79 +0,0 @@
1
- model:
2
- target: cldm.cldm.ControlLDM
3
- params:
4
- linear_start: 0.00085
5
- linear_end: 0.0120
6
- num_timesteps_cond: 1
7
- log_every_t: 200
8
- timesteps: 1000
9
- first_stage_key: "jpg"
10
- cond_stage_key: "txt"
11
- control_key: "hint"
12
- image_size: 64
13
- channels: 4
14
- cond_stage_trainable: false
15
- conditioning_key: crossattn
16
- monitor: val/loss_simple_ema
17
- scale_factor: 0.18215
18
- use_ema: False
19
- only_mid_control: False
20
-
21
- control_stage_config:
22
- target: cldm.cldm.ControlNet
23
- params:
24
- image_size: 32 # unused
25
- in_channels: 4
26
- hint_channels: 3
27
- model_channels: 320
28
- attention_resolutions: [ 4, 2, 1 ]
29
- num_res_blocks: 2
30
- channel_mult: [ 1, 2, 4, 4 ]
31
- num_heads: 8
32
- use_spatial_transformer: True
33
- transformer_depth: 1
34
- context_dim: 768
35
- use_checkpoint: True
36
- legacy: False
37
-
38
- unet_config:
39
- target: cldm.cldm.ControlledUnetModel
40
- params:
41
- image_size: 32 # unused
42
- in_channels: 4
43
- out_channels: 4
44
- model_channels: 320
45
- attention_resolutions: [ 4, 2, 1 ]
46
- num_res_blocks: 2
47
- channel_mult: [ 1, 2, 4, 4 ]
48
- num_heads: 8
49
- use_spatial_transformer: True
50
- transformer_depth: 1
51
- context_dim: 768
52
- use_checkpoint: True
53
- legacy: False
54
-
55
- first_stage_config:
56
- target: ldm.models.autoencoder.AutoencoderKL
57
- params:
58
- embed_dim: 4
59
- monitor: val/rec_loss
60
- ddconfig:
61
- double_z: true
62
- z_channels: 4
63
- resolution: 256
64
- in_channels: 3
65
- out_ch: 3
66
- ch: 128
67
- ch_mult:
68
- - 1
69
- - 2
70
- - 4
71
- - 4
72
- num_res_blocks: 2
73
- attn_resolutions: []
74
- dropout: 0.0
75
- lossconfig:
76
- target: torch.nn.Identity
77
-
78
- cond_stage_config:
79
- target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
control_v11p_sd15_mlsd_fp16.yaml DELETED
@@ -1,79 +0,0 @@
1
- model:
2
- target: cldm.cldm.ControlLDM
3
- params:
4
- linear_start: 0.00085
5
- linear_end: 0.0120
6
- num_timesteps_cond: 1
7
- log_every_t: 200
8
- timesteps: 1000
9
- first_stage_key: "jpg"
10
- cond_stage_key: "txt"
11
- control_key: "hint"
12
- image_size: 64
13
- channels: 4
14
- cond_stage_trainable: false
15
- conditioning_key: crossattn
16
- monitor: val/loss_simple_ema
17
- scale_factor: 0.18215
18
- use_ema: False
19
- only_mid_control: False
20
-
21
- control_stage_config:
22
- target: cldm.cldm.ControlNet
23
- params:
24
- image_size: 32 # unused
25
- in_channels: 4
26
- hint_channels: 3
27
- model_channels: 320
28
- attention_resolutions: [ 4, 2, 1 ]
29
- num_res_blocks: 2
30
- channel_mult: [ 1, 2, 4, 4 ]
31
- num_heads: 8
32
- use_spatial_transformer: True
33
- transformer_depth: 1
34
- context_dim: 768
35
- use_checkpoint: True
36
- legacy: False
37
-
38
- unet_config:
39
- target: cldm.cldm.ControlledUnetModel
40
- params:
41
- image_size: 32 # unused
42
- in_channels: 4
43
- out_channels: 4
44
- model_channels: 320
45
- attention_resolutions: [ 4, 2, 1 ]
46
- num_res_blocks: 2
47
- channel_mult: [ 1, 2, 4, 4 ]
48
- num_heads: 8
49
- use_spatial_transformer: True
50
- transformer_depth: 1
51
- context_dim: 768
52
- use_checkpoint: True
53
- legacy: False
54
-
55
- first_stage_config:
56
- target: ldm.models.autoencoder.AutoencoderKL
57
- params:
58
- embed_dim: 4
59
- monitor: val/rec_loss
60
- ddconfig:
61
- double_z: true
62
- z_channels: 4
63
- resolution: 256
64
- in_channels: 3
65
- out_ch: 3
66
- ch: 128
67
- ch_mult:
68
- - 1
69
- - 2
70
- - 4
71
- - 4
72
- num_res_blocks: 2
73
- attn_resolutions: []
74
- dropout: 0.0
75
- lossconfig:
76
- target: torch.nn.Identity
77
-
78
- cond_stage_config:
79
- target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
control_v11p_sd15_normalbae.yaml DELETED
@@ -1,79 +0,0 @@
1
- model:
2
- target: cldm.cldm.ControlLDM
3
- params:
4
- linear_start: 0.00085
5
- linear_end: 0.0120
6
- num_timesteps_cond: 1
7
- log_every_t: 200
8
- timesteps: 1000
9
- first_stage_key: "jpg"
10
- cond_stage_key: "txt"
11
- control_key: "hint"
12
- image_size: 64
13
- channels: 4
14
- cond_stage_trainable: false
15
- conditioning_key: crossattn
16
- monitor: val/loss_simple_ema
17
- scale_factor: 0.18215
18
- use_ema: False
19
- only_mid_control: False
20
-
21
- control_stage_config:
22
- target: cldm.cldm.ControlNet
23
- params:
24
- image_size: 32 # unused
25
- in_channels: 4
26
- hint_channels: 3
27
- model_channels: 320
28
- attention_resolutions: [ 4, 2, 1 ]
29
- num_res_blocks: 2
30
- channel_mult: [ 1, 2, 4, 4 ]
31
- num_heads: 8
32
- use_spatial_transformer: True
33
- transformer_depth: 1
34
- context_dim: 768
35
- use_checkpoint: True
36
- legacy: False
37
-
38
- unet_config:
39
- target: cldm.cldm.ControlledUnetModel
40
- params:
41
- image_size: 32 # unused
42
- in_channels: 4
43
- out_channels: 4
44
- model_channels: 320
45
- attention_resolutions: [ 4, 2, 1 ]
46
- num_res_blocks: 2
47
- channel_mult: [ 1, 2, 4, 4 ]
48
- num_heads: 8
49
- use_spatial_transformer: True
50
- transformer_depth: 1
51
- context_dim: 768
52
- use_checkpoint: True
53
- legacy: False
54
-
55
- first_stage_config:
56
- target: ldm.models.autoencoder.AutoencoderKL
57
- params:
58
- embed_dim: 4
59
- monitor: val/rec_loss
60
- ddconfig:
61
- double_z: true
62
- z_channels: 4
63
- resolution: 256
64
- in_channels: 3
65
- out_ch: 3
66
- ch: 128
67
- ch_mult:
68
- - 1
69
- - 2
70
- - 4
71
- - 4
72
- num_res_blocks: 2
73
- attn_resolutions: []
74
- dropout: 0.0
75
- lossconfig:
76
- target: torch.nn.Identity
77
-
78
- cond_stage_config:
79
- target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
control_v11p_sd15_normalbae_fp16.yaml DELETED
@@ -1,79 +0,0 @@
1
- model:
2
- target: cldm.cldm.ControlLDM
3
- params:
4
- linear_start: 0.00085
5
- linear_end: 0.0120
6
- num_timesteps_cond: 1
7
- log_every_t: 200
8
- timesteps: 1000
9
- first_stage_key: "jpg"
10
- cond_stage_key: "txt"
11
- control_key: "hint"
12
- image_size: 64
13
- channels: 4
14
- cond_stage_trainable: false
15
- conditioning_key: crossattn
16
- monitor: val/loss_simple_ema
17
- scale_factor: 0.18215
18
- use_ema: False
19
- only_mid_control: False
20
-
21
- control_stage_config:
22
- target: cldm.cldm.ControlNet
23
- params:
24
- image_size: 32 # unused
25
- in_channels: 4
26
- hint_channels: 3
27
- model_channels: 320
28
- attention_resolutions: [ 4, 2, 1 ]
29
- num_res_blocks: 2
30
- channel_mult: [ 1, 2, 4, 4 ]
31
- num_heads: 8
32
- use_spatial_transformer: True
33
- transformer_depth: 1
34
- context_dim: 768
35
- use_checkpoint: True
36
- legacy: False
37
-
38
- unet_config:
39
- target: cldm.cldm.ControlledUnetModel
40
- params:
41
- image_size: 32 # unused
42
- in_channels: 4
43
- out_channels: 4
44
- model_channels: 320
45
- attention_resolutions: [ 4, 2, 1 ]
46
- num_res_blocks: 2
47
- channel_mult: [ 1, 2, 4, 4 ]
48
- num_heads: 8
49
- use_spatial_transformer: True
50
- transformer_depth: 1
51
- context_dim: 768
52
- use_checkpoint: True
53
- legacy: False
54
-
55
- first_stage_config:
56
- target: ldm.models.autoencoder.AutoencoderKL
57
- params:
58
- embed_dim: 4
59
- monitor: val/rec_loss
60
- ddconfig:
61
- double_z: true
62
- z_channels: 4
63
- resolution: 256
64
- in_channels: 3
65
- out_ch: 3
66
- ch: 128
67
- ch_mult:
68
- - 1
69
- - 2
70
- - 4
71
- - 4
72
- num_res_blocks: 2
73
- attn_resolutions: []
74
- dropout: 0.0
75
- lossconfig:
76
- target: torch.nn.Identity
77
-
78
- cond_stage_config:
79
- target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
control_v11p_sd15_openpose.yaml DELETED
@@ -1,79 +0,0 @@
1
- model:
2
- target: cldm.cldm.ControlLDM
3
- params:
4
- linear_start: 0.00085
5
- linear_end: 0.0120
6
- num_timesteps_cond: 1
7
- log_every_t: 200
8
- timesteps: 1000
9
- first_stage_key: "jpg"
10
- cond_stage_key: "txt"
11
- control_key: "hint"
12
- image_size: 64
13
- channels: 4
14
- cond_stage_trainable: false
15
- conditioning_key: crossattn
16
- monitor: val/loss_simple_ema
17
- scale_factor: 0.18215
18
- use_ema: False
19
- only_mid_control: False
20
-
21
- control_stage_config:
22
- target: cldm.cldm.ControlNet
23
- params:
24
- image_size: 32 # unused
25
- in_channels: 4
26
- hint_channels: 3
27
- model_channels: 320
28
- attention_resolutions: [ 4, 2, 1 ]
29
- num_res_blocks: 2
30
- channel_mult: [ 1, 2, 4, 4 ]
31
- num_heads: 8
32
- use_spatial_transformer: True
33
- transformer_depth: 1
34
- context_dim: 768
35
- use_checkpoint: True
36
- legacy: False
37
-
38
- unet_config:
39
- target: cldm.cldm.ControlledUnetModel
40
- params:
41
- image_size: 32 # unused
42
- in_channels: 4
43
- out_channels: 4
44
- model_channels: 320
45
- attention_resolutions: [ 4, 2, 1 ]
46
- num_res_blocks: 2
47
- channel_mult: [ 1, 2, 4, 4 ]
48
- num_heads: 8
49
- use_spatial_transformer: True
50
- transformer_depth: 1
51
- context_dim: 768
52
- use_checkpoint: True
53
- legacy: False
54
-
55
- first_stage_config:
56
- target: ldm.models.autoencoder.AutoencoderKL
57
- params:
58
- embed_dim: 4
59
- monitor: val/rec_loss
60
- ddconfig:
61
- double_z: true
62
- z_channels: 4
63
- resolution: 256
64
- in_channels: 3
65
- out_ch: 3
66
- ch: 128
67
- ch_mult:
68
- - 1
69
- - 2
70
- - 4
71
- - 4
72
- num_res_blocks: 2
73
- attn_resolutions: []
74
- dropout: 0.0
75
- lossconfig:
76
- target: torch.nn.Identity
77
-
78
- cond_stage_config:
79
- target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
control_v11p_sd15_openpose_fp16.yaml DELETED
@@ -1,79 +0,0 @@
1
- model:
2
- target: cldm.cldm.ControlLDM
3
- params:
4
- linear_start: 0.00085
5
- linear_end: 0.0120
6
- num_timesteps_cond: 1
7
- log_every_t: 200
8
- timesteps: 1000
9
- first_stage_key: "jpg"
10
- cond_stage_key: "txt"
11
- control_key: "hint"
12
- image_size: 64
13
- channels: 4
14
- cond_stage_trainable: false
15
- conditioning_key: crossattn
16
- monitor: val/loss_simple_ema
17
- scale_factor: 0.18215
18
- use_ema: False
19
- only_mid_control: False
20
-
21
- control_stage_config:
22
- target: cldm.cldm.ControlNet
23
- params:
24
- image_size: 32 # unused
25
- in_channels: 4
26
- hint_channels: 3
27
- model_channels: 320
28
- attention_resolutions: [ 4, 2, 1 ]
29
- num_res_blocks: 2
30
- channel_mult: [ 1, 2, 4, 4 ]
31
- num_heads: 8
32
- use_spatial_transformer: True
33
- transformer_depth: 1
34
- context_dim: 768
35
- use_checkpoint: True
36
- legacy: False
37
-
38
- unet_config:
39
- target: cldm.cldm.ControlledUnetModel
40
- params:
41
- image_size: 32 # unused
42
- in_channels: 4
43
- out_channels: 4
44
- model_channels: 320
45
- attention_resolutions: [ 4, 2, 1 ]
46
- num_res_blocks: 2
47
- channel_mult: [ 1, 2, 4, 4 ]
48
- num_heads: 8
49
- use_spatial_transformer: True
50
- transformer_depth: 1
51
- context_dim: 768
52
- use_checkpoint: True
53
- legacy: False
54
-
55
- first_stage_config:
56
- target: ldm.models.autoencoder.AutoencoderKL
57
- params:
58
- embed_dim: 4
59
- monitor: val/rec_loss
60
- ddconfig:
61
- double_z: true
62
- z_channels: 4
63
- resolution: 256
64
- in_channels: 3
65
- out_ch: 3
66
- ch: 128
67
- ch_mult:
68
- - 1
69
- - 2
70
- - 4
71
- - 4
72
- num_res_blocks: 2
73
- attn_resolutions: []
74
- dropout: 0.0
75
- lossconfig:
76
- target: torch.nn.Identity
77
-
78
- cond_stage_config:
79
- target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
control_v11p_sd15_scribble.yaml DELETED
@@ -1,79 +0,0 @@
1
- model:
2
- target: cldm.cldm.ControlLDM
3
- params:
4
- linear_start: 0.00085
5
- linear_end: 0.0120
6
- num_timesteps_cond: 1
7
- log_every_t: 200
8
- timesteps: 1000
9
- first_stage_key: "jpg"
10
- cond_stage_key: "txt"
11
- control_key: "hint"
12
- image_size: 64
13
- channels: 4
14
- cond_stage_trainable: false
15
- conditioning_key: crossattn
16
- monitor: val/loss_simple_ema
17
- scale_factor: 0.18215
18
- use_ema: False
19
- only_mid_control: False
20
-
21
- control_stage_config:
22
- target: cldm.cldm.ControlNet
23
- params:
24
- image_size: 32 # unused
25
- in_channels: 4
26
- hint_channels: 3
27
- model_channels: 320
28
- attention_resolutions: [ 4, 2, 1 ]
29
- num_res_blocks: 2
30
- channel_mult: [ 1, 2, 4, 4 ]
31
- num_heads: 8
32
- use_spatial_transformer: True
33
- transformer_depth: 1
34
- context_dim: 768
35
- use_checkpoint: True
36
- legacy: False
37
-
38
- unet_config:
39
- target: cldm.cldm.ControlledUnetModel
40
- params:
41
- image_size: 32 # unused
42
- in_channels: 4
43
- out_channels: 4
44
- model_channels: 320
45
- attention_resolutions: [ 4, 2, 1 ]
46
- num_res_blocks: 2
47
- channel_mult: [ 1, 2, 4, 4 ]
48
- num_heads: 8
49
- use_spatial_transformer: True
50
- transformer_depth: 1
51
- context_dim: 768
52
- use_checkpoint: True
53
- legacy: False
54
-
55
- first_stage_config:
56
- target: ldm.models.autoencoder.AutoencoderKL
57
- params:
58
- embed_dim: 4
59
- monitor: val/rec_loss
60
- ddconfig:
61
- double_z: true
62
- z_channels: 4
63
- resolution: 256
64
- in_channels: 3
65
- out_ch: 3
66
- ch: 128
67
- ch_mult:
68
- - 1
69
- - 2
70
- - 4
71
- - 4
72
- num_res_blocks: 2
73
- attn_resolutions: []
74
- dropout: 0.0
75
- lossconfig:
76
- target: torch.nn.Identity
77
-
78
- cond_stage_config:
79
- target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
control_v11p_sd15_scribble_fp16.yaml DELETED
@@ -1,79 +0,0 @@
1
- model:
2
- target: cldm.cldm.ControlLDM
3
- params:
4
- linear_start: 0.00085
5
- linear_end: 0.0120
6
- num_timesteps_cond: 1
7
- log_every_t: 200
8
- timesteps: 1000
9
- first_stage_key: "jpg"
10
- cond_stage_key: "txt"
11
- control_key: "hint"
12
- image_size: 64
13
- channels: 4
14
- cond_stage_trainable: false
15
- conditioning_key: crossattn
16
- monitor: val/loss_simple_ema
17
- scale_factor: 0.18215
18
- use_ema: False
19
- only_mid_control: False
20
-
21
- control_stage_config:
22
- target: cldm.cldm.ControlNet
23
- params:
24
- image_size: 32 # unused
25
- in_channels: 4
26
- hint_channels: 3
27
- model_channels: 320
28
- attention_resolutions: [ 4, 2, 1 ]
29
- num_res_blocks: 2
30
- channel_mult: [ 1, 2, 4, 4 ]
31
- num_heads: 8
32
- use_spatial_transformer: True
33
- transformer_depth: 1
34
- context_dim: 768
35
- use_checkpoint: True
36
- legacy: False
37
-
38
- unet_config:
39
- target: cldm.cldm.ControlledUnetModel
40
- params:
41
- image_size: 32 # unused
42
- in_channels: 4
43
- out_channels: 4
44
- model_channels: 320
45
- attention_resolutions: [ 4, 2, 1 ]
46
- num_res_blocks: 2
47
- channel_mult: [ 1, 2, 4, 4 ]
48
- num_heads: 8
49
- use_spatial_transformer: True
50
- transformer_depth: 1
51
- context_dim: 768
52
- use_checkpoint: True
53
- legacy: False
54
-
55
- first_stage_config:
56
- target: ldm.models.autoencoder.AutoencoderKL
57
- params:
58
- embed_dim: 4
59
- monitor: val/rec_loss
60
- ddconfig:
61
- double_z: true
62
- z_channels: 4
63
- resolution: 256
64
- in_channels: 3
65
- out_ch: 3
66
- ch: 128
67
- ch_mult:
68
- - 1
69
- - 2
70
- - 4
71
- - 4
72
- num_res_blocks: 2
73
- attn_resolutions: []
74
- dropout: 0.0
75
- lossconfig:
76
- target: torch.nn.Identity
77
-
78
- cond_stage_config:
79
- target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
control_v11p_sd15_seg.yaml DELETED
@@ -1,79 +0,0 @@
1
- model:
2
- target: cldm.cldm.ControlLDM
3
- params:
4
- linear_start: 0.00085
5
- linear_end: 0.0120
6
- num_timesteps_cond: 1
7
- log_every_t: 200
8
- timesteps: 1000
9
- first_stage_key: "jpg"
10
- cond_stage_key: "txt"
11
- control_key: "hint"
12
- image_size: 64
13
- channels: 4
14
- cond_stage_trainable: false
15
- conditioning_key: crossattn
16
- monitor: val/loss_simple_ema
17
- scale_factor: 0.18215
18
- use_ema: False
19
- only_mid_control: False
20
-
21
- control_stage_config:
22
- target: cldm.cldm.ControlNet
23
- params:
24
- image_size: 32 # unused
25
- in_channels: 4
26
- hint_channels: 3
27
- model_channels: 320
28
- attention_resolutions: [ 4, 2, 1 ]
29
- num_res_blocks: 2
30
- channel_mult: [ 1, 2, 4, 4 ]
31
- num_heads: 8
32
- use_spatial_transformer: True
33
- transformer_depth: 1
34
- context_dim: 768
35
- use_checkpoint: True
36
- legacy: False
37
-
38
- unet_config:
39
- target: cldm.cldm.ControlledUnetModel
40
- params:
41
- image_size: 32 # unused
42
- in_channels: 4
43
- out_channels: 4
44
- model_channels: 320
45
- attention_resolutions: [ 4, 2, 1 ]
46
- num_res_blocks: 2
47
- channel_mult: [ 1, 2, 4, 4 ]
48
- num_heads: 8
49
- use_spatial_transformer: True
50
- transformer_depth: 1
51
- context_dim: 768
52
- use_checkpoint: True
53
- legacy: False
54
-
55
- first_stage_config:
56
- target: ldm.models.autoencoder.AutoencoderKL
57
- params:
58
- embed_dim: 4
59
- monitor: val/rec_loss
60
- ddconfig:
61
- double_z: true
62
- z_channels: 4
63
- resolution: 256
64
- in_channels: 3
65
- out_ch: 3
66
- ch: 128
67
- ch_mult:
68
- - 1
69
- - 2
70
- - 4
71
- - 4
72
- num_res_blocks: 2
73
- attn_resolutions: []
74
- dropout: 0.0
75
- lossconfig:
76
- target: torch.nn.Identity
77
-
78
- cond_stage_config:
79
- target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
control_v11p_sd15_seg_fp16.yaml DELETED
@@ -1,79 +0,0 @@
1
- model:
2
- target: cldm.cldm.ControlLDM
3
- params:
4
- linear_start: 0.00085
5
- linear_end: 0.0120
6
- num_timesteps_cond: 1
7
- log_every_t: 200
8
- timesteps: 1000
9
- first_stage_key: "jpg"
10
- cond_stage_key: "txt"
11
- control_key: "hint"
12
- image_size: 64
13
- channels: 4
14
- cond_stage_trainable: false
15
- conditioning_key: crossattn
16
- monitor: val/loss_simple_ema
17
- scale_factor: 0.18215
18
- use_ema: False
19
- only_mid_control: False
20
-
21
- control_stage_config:
22
- target: cldm.cldm.ControlNet
23
- params:
24
- image_size: 32 # unused
25
- in_channels: 4
26
- hint_channels: 3
27
- model_channels: 320
28
- attention_resolutions: [ 4, 2, 1 ]
29
- num_res_blocks: 2
30
- channel_mult: [ 1, 2, 4, 4 ]
31
- num_heads: 8
32
- use_spatial_transformer: True
33
- transformer_depth: 1
34
- context_dim: 768
35
- use_checkpoint: True
36
- legacy: False
37
-
38
- unet_config:
39
- target: cldm.cldm.ControlledUnetModel
40
- params:
41
- image_size: 32 # unused
42
- in_channels: 4
43
- out_channels: 4
44
- model_channels: 320
45
- attention_resolutions: [ 4, 2, 1 ]
46
- num_res_blocks: 2
47
- channel_mult: [ 1, 2, 4, 4 ]
48
- num_heads: 8
49
- use_spatial_transformer: True
50
- transformer_depth: 1
51
- context_dim: 768
52
- use_checkpoint: True
53
- legacy: False
54
-
55
- first_stage_config:
56
- target: ldm.models.autoencoder.AutoencoderKL
57
- params:
58
- embed_dim: 4
59
- monitor: val/rec_loss
60
- ddconfig:
61
- double_z: true
62
- z_channels: 4
63
- resolution: 256
64
- in_channels: 3
65
- out_ch: 3
66
- ch: 128
67
- ch_mult:
68
- - 1
69
- - 2
70
- - 4
71
- - 4
72
- num_res_blocks: 2
73
- attn_resolutions: []
74
- dropout: 0.0
75
- lossconfig:
76
- target: torch.nn.Identity
77
-
78
- cond_stage_config:
79
- target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
control_v11p_sd15_softedge.yaml DELETED
@@ -1,79 +0,0 @@
1
- model:
2
- target: cldm.cldm.ControlLDM
3
- params:
4
- linear_start: 0.00085
5
- linear_end: 0.0120
6
- num_timesteps_cond: 1
7
- log_every_t: 200
8
- timesteps: 1000
9
- first_stage_key: "jpg"
10
- cond_stage_key: "txt"
11
- control_key: "hint"
12
- image_size: 64
13
- channels: 4
14
- cond_stage_trainable: false
15
- conditioning_key: crossattn
16
- monitor: val/loss_simple_ema
17
- scale_factor: 0.18215
18
- use_ema: False
19
- only_mid_control: False
20
-
21
- control_stage_config:
22
- target: cldm.cldm.ControlNet
23
- params:
24
- image_size: 32 # unused
25
- in_channels: 4
26
- hint_channels: 3
27
- model_channels: 320
28
- attention_resolutions: [ 4, 2, 1 ]
29
- num_res_blocks: 2
30
- channel_mult: [ 1, 2, 4, 4 ]
31
- num_heads: 8
32
- use_spatial_transformer: True
33
- transformer_depth: 1
34
- context_dim: 768
35
- use_checkpoint: True
36
- legacy: False
37
-
38
- unet_config:
39
- target: cldm.cldm.ControlledUnetModel
40
- params:
41
- image_size: 32 # unused
42
- in_channels: 4
43
- out_channels: 4
44
- model_channels: 320
45
- attention_resolutions: [ 4, 2, 1 ]
46
- num_res_blocks: 2
47
- channel_mult: [ 1, 2, 4, 4 ]
48
- num_heads: 8
49
- use_spatial_transformer: True
50
- transformer_depth: 1
51
- context_dim: 768
52
- use_checkpoint: True
53
- legacy: False
54
-
55
- first_stage_config:
56
- target: ldm.models.autoencoder.AutoencoderKL
57
- params:
58
- embed_dim: 4
59
- monitor: val/rec_loss
60
- ddconfig:
61
- double_z: true
62
- z_channels: 4
63
- resolution: 256
64
- in_channels: 3
65
- out_ch: 3
66
- ch: 128
67
- ch_mult:
68
- - 1
69
- - 2
70
- - 4
71
- - 4
72
- num_res_blocks: 2
73
- attn_resolutions: []
74
- dropout: 0.0
75
- lossconfig:
76
- target: torch.nn.Identity
77
-
78
- cond_stage_config:
79
- target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
control_v11p_sd15_softedge_fp16.yaml DELETED
@@ -1,79 +0,0 @@
1
- model:
2
- target: cldm.cldm.ControlLDM
3
- params:
4
- linear_start: 0.00085
5
- linear_end: 0.0120
6
- num_timesteps_cond: 1
7
- log_every_t: 200
8
- timesteps: 1000
9
- first_stage_key: "jpg"
10
- cond_stage_key: "txt"
11
- control_key: "hint"
12
- image_size: 64
13
- channels: 4
14
- cond_stage_trainable: false
15
- conditioning_key: crossattn
16
- monitor: val/loss_simple_ema
17
- scale_factor: 0.18215
18
- use_ema: False
19
- only_mid_control: False
20
-
21
- control_stage_config:
22
- target: cldm.cldm.ControlNet
23
- params:
24
- image_size: 32 # unused
25
- in_channels: 4
26
- hint_channels: 3
27
- model_channels: 320
28
- attention_resolutions: [ 4, 2, 1 ]
29
- num_res_blocks: 2
30
- channel_mult: [ 1, 2, 4, 4 ]
31
- num_heads: 8
32
- use_spatial_transformer: True
33
- transformer_depth: 1
34
- context_dim: 768
35
- use_checkpoint: True
36
- legacy: False
37
-
38
- unet_config:
39
- target: cldm.cldm.ControlledUnetModel
40
- params:
41
- image_size: 32 # unused
42
- in_channels: 4
43
- out_channels: 4
44
- model_channels: 320
45
- attention_resolutions: [ 4, 2, 1 ]
46
- num_res_blocks: 2
47
- channel_mult: [ 1, 2, 4, 4 ]
48
- num_heads: 8
49
- use_spatial_transformer: True
50
- transformer_depth: 1
51
- context_dim: 768
52
- use_checkpoint: True
53
- legacy: False
54
-
55
- first_stage_config:
56
- target: ldm.models.autoencoder.AutoencoderKL
57
- params:
58
- embed_dim: 4
59
- monitor: val/rec_loss
60
- ddconfig:
61
- double_z: true
62
- z_channels: 4
63
- resolution: 256
64
- in_channels: 3
65
- out_ch: 3
66
- ch: 128
67
- ch_mult:
68
- - 1
69
- - 2
70
- - 4
71
- - 4
72
- num_res_blocks: 2
73
- attn_resolutions: []
74
- dropout: 0.0
75
- lossconfig:
76
- target: torch.nn.Identity
77
-
78
- cond_stage_config:
79
- target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
control_v11p_sd15s2_lineart_anime.yaml DELETED
@@ -1,79 +0,0 @@
1
- model:
2
- target: cldm.cldm.ControlLDM
3
- params:
4
- linear_start: 0.00085
5
- linear_end: 0.0120
6
- num_timesteps_cond: 1
7
- log_every_t: 200
8
- timesteps: 1000
9
- first_stage_key: "jpg"
10
- cond_stage_key: "txt"
11
- control_key: "hint"
12
- image_size: 64
13
- channels: 4
14
- cond_stage_trainable: false
15
- conditioning_key: crossattn
16
- monitor: val/loss_simple_ema
17
- scale_factor: 0.18215
18
- use_ema: False
19
- only_mid_control: False
20
-
21
- control_stage_config:
22
- target: cldm.cldm.ControlNet
23
- params:
24
- image_size: 32 # unused
25
- in_channels: 4
26
- hint_channels: 3
27
- model_channels: 320
28
- attention_resolutions: [ 4, 2, 1 ]
29
- num_res_blocks: 2
30
- channel_mult: [ 1, 2, 4, 4 ]
31
- num_heads: 8
32
- use_spatial_transformer: True
33
- transformer_depth: 1
34
- context_dim: 768
35
- use_checkpoint: True
36
- legacy: False
37
-
38
- unet_config:
39
- target: cldm.cldm.ControlledUnetModel
40
- params:
41
- image_size: 32 # unused
42
- in_channels: 4
43
- out_channels: 4
44
- model_channels: 320
45
- attention_resolutions: [ 4, 2, 1 ]
46
- num_res_blocks: 2
47
- channel_mult: [ 1, 2, 4, 4 ]
48
- num_heads: 8
49
- use_spatial_transformer: True
50
- transformer_depth: 1
51
- context_dim: 768
52
- use_checkpoint: True
53
- legacy: False
54
-
55
- first_stage_config:
56
- target: ldm.models.autoencoder.AutoencoderKL
57
- params:
58
- embed_dim: 4
59
- monitor: val/rec_loss
60
- ddconfig:
61
- double_z: true
62
- z_channels: 4
63
- resolution: 256
64
- in_channels: 3
65
- out_ch: 3
66
- ch: 128
67
- ch_mult:
68
- - 1
69
- - 2
70
- - 4
71
- - 4
72
- num_res_blocks: 2
73
- attn_resolutions: []
74
- dropout: 0.0
75
- lossconfig:
76
- target: torch.nn.Identity
77
-
78
- cond_stage_config:
79
- target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
control_v11p_sd15s2_lineart_anime_fp16.yaml DELETED
@@ -1,79 +0,0 @@
1
- model:
2
- target: cldm.cldm.ControlLDM
3
- params:
4
- linear_start: 0.00085
5
- linear_end: 0.0120
6
- num_timesteps_cond: 1
7
- log_every_t: 200
8
- timesteps: 1000
9
- first_stage_key: "jpg"
10
- cond_stage_key: "txt"
11
- control_key: "hint"
12
- image_size: 64
13
- channels: 4
14
- cond_stage_trainable: false
15
- conditioning_key: crossattn
16
- monitor: val/loss_simple_ema
17
- scale_factor: 0.18215
18
- use_ema: False
19
- only_mid_control: False
20
-
21
- control_stage_config:
22
- target: cldm.cldm.ControlNet
23
- params:
24
- image_size: 32 # unused
25
- in_channels: 4
26
- hint_channels: 3
27
- model_channels: 320
28
- attention_resolutions: [ 4, 2, 1 ]
29
- num_res_blocks: 2
30
- channel_mult: [ 1, 2, 4, 4 ]
31
- num_heads: 8
32
- use_spatial_transformer: True
33
- transformer_depth: 1
34
- context_dim: 768
35
- use_checkpoint: True
36
- legacy: False
37
-
38
- unet_config:
39
- target: cldm.cldm.ControlledUnetModel
40
- params:
41
- image_size: 32 # unused
42
- in_channels: 4
43
- out_channels: 4
44
- model_channels: 320
45
- attention_resolutions: [ 4, 2, 1 ]
46
- num_res_blocks: 2
47
- channel_mult: [ 1, 2, 4, 4 ]
48
- num_heads: 8
49
- use_spatial_transformer: True
50
- transformer_depth: 1
51
- context_dim: 768
52
- use_checkpoint: True
53
- legacy: False
54
-
55
- first_stage_config:
56
- target: ldm.models.autoencoder.AutoencoderKL
57
- params:
58
- embed_dim: 4
59
- monitor: val/rec_loss
60
- ddconfig:
61
- double_z: true
62
- z_channels: 4
63
- resolution: 256
64
- in_channels: 3
65
- out_ch: 3
66
- ch: 128
67
- ch_mult:
68
- - 1
69
- - 2
70
- - 4
71
- - 4
72
- num_res_blocks: 2
73
- attn_resolutions: []
74
- dropout: 0.0
75
- lossconfig:
76
- target: torch.nn.Identity
77
-
78
- cond_stage_config:
79
- target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
control_v1p_sd15_qrcode_monster.yaml DELETED
@@ -1,80 +0,0 @@
1
- model:
2
- target: cldm.cldm.ControlLDM
3
- params:
4
- linear_start: 0.00085
5
- linear_end: 0.0120
6
- num_timesteps_cond: 1
7
- log_every_t: 200
8
- timesteps: 1000
9
- first_stage_key: "jpg"
10
- cond_stage_key: "txt"
11
- control_key: "hint"
12
- image_size: 64
13
- channels: 4
14
- cond_stage_trainable: false
15
- conditioning_key: crossattn
16
- monitor: val/loss_simple_ema
17
- scale_factor: 0.18215
18
- use_ema: False
19
- only_mid_control: False
20
-
21
- control_stage_config:
22
- target: cldm.cldm.ControlNet
23
- params:
24
- image_size: 32 # unused
25
- in_channels: 4
26
- hint_channels: 3
27
- model_channels: 320
28
- attention_resolutions: [ 4, 2, 1 ]
29
- num_res_blocks: 2
30
- channel_mult: [ 1, 2, 4, 4 ]
31
- num_heads: 8
32
- use_spatial_transformer: True
33
- transformer_depth: 1
34
- context_dim: 768
35
- use_checkpoint: True
36
- legacy: False
37
-
38
- unet_config:
39
- target: cldm.cldm.ControlledUnetModel
40
- params:
41
- image_size: 32 # unused
42
- in_channels: 4
43
- out_channels: 4
44
- model_channels: 320
45
- attention_resolutions: [ 4, 2, 1 ]
46
- num_res_blocks: 2
47
- channel_mult: [ 1, 2, 4, 4 ]
48
- num_heads: 8
49
- use_spatial_transformer: True
50
- transformer_depth: 1
51
- context_dim: 768
52
- use_checkpoint: True
53
- legacy: False
54
-
55
- first_stage_config:
56
- target: ldm.models.autoencoder.AutoencoderKL
57
- params:
58
- embed_dim: 4
59
- monitor: val/rec_loss
60
- ddconfig:
61
- double_z: true
62
- z_channels: 4
63
- resolution: 256
64
- in_channels: 3
65
- out_ch: 3
66
- ch: 128
67
- ch_mult:
68
- - 1
69
- - 2
70
- - 4
71
- - 4
72
- num_res_blocks: 2
73
- attn_resolutions: []
74
- dropout: 0.0
75
- lossconfig:
76
- target: torch.nn.Identity
77
-
78
- cond_stage_config:
79
- target: ldm.modules.encoders.modules.FrozenCLIPEmbedder
80
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
image_adapter_v14.yaml DELETED
@@ -1,9 +0,0 @@
1
- model:
2
- target: tencentarc.t21_adapter
3
- params:
4
- channels: [320, 640, 1280, 1280]
5
- nums_rb: 2
6
- ksize: 1
7
- sk: true
8
- cin: 192
9
- use_conv: false
 
 
 
 
 
 
 
 
 
 
sketch_adapter_v14.yaml DELETED
@@ -1,9 +0,0 @@
1
- model:
2
- target: tencentarc.t21_adapter
3
- params:
4
- channels: [320, 640, 1280, 1280]
5
- nums_rb: 2
6
- ksize: 1
7
- sk: true
8
- cin: 64
9
- use_conv: false
 
 
 
 
 
 
 
 
 
 
t2iadapter_canny_sd14v1.yaml DELETED
@@ -1,9 +0,0 @@
1
- model:
2
- target: tencentarc.t21_adapter
3
- params:
4
- channels: [320, 640, 1280, 1280]
5
- nums_rb: 2
6
- ksize: 1
7
- sk: true
8
- cin: 64
9
- use_conv: false
 
 
 
 
 
 
 
 
 
 
t2iadapter_canny_sd15v2.yaml DELETED
@@ -1,9 +0,0 @@
1
- model:
2
- target: tencentarc.t21_adapter
3
- params:
4
- channels: [320, 640, 1280, 1280]
5
- nums_rb: 2
6
- ksize: 1
7
- sk: true
8
- cin: 64
9
- use_conv: false
 
 
 
 
 
 
 
 
 
 
t2iadapter_color_sd14v1.yaml DELETED
@@ -1,6 +0,0 @@
1
- model:
2
- target: scripts.adapter.Adapter_light
3
- params:
4
- channels: [320, 640, 1280, 1280]
5
- nums_rb: 4
6
- cin: 192
 
 
 
 
 
 
 
t2iadapter_depth_sd14v1.yaml DELETED
@@ -1,9 +0,0 @@
1
- model:
2
- target: tencentarc.t21_adapter
3
- params:
4
- channels: [320, 640, 1280, 1280]
5
- nums_rb: 2
6
- ksize: 1
7
- sk: true
8
- cin: 192
9
- use_conv: false
 
 
 
 
 
 
 
 
 
 
t2iadapter_depth_sd15v2.yaml DELETED
@@ -1,9 +0,0 @@
1
- model:
2
- target: tencentarc.t21_adapter
3
- params:
4
- channels: [320, 640, 1280, 1280]
5
- nums_rb: 2
6
- ksize: 1
7
- sk: true
8
- cin: 192
9
- use_conv: false
 
 
 
 
 
 
 
 
 
 
t2iadapter_keypose_sd14v1.yaml DELETED
@@ -1,9 +0,0 @@
1
- model:
2
- target: tencentarc.t21_adapter
3
- params:
4
- channels: [320, 640, 1280, 1280]
5
- nums_rb: 2
6
- ksize: 1
7
- sk: true
8
- cin: 192
9
- use_conv: false
 
 
 
 
 
 
 
 
 
 
t2iadapter_openpose_sd14v1.yaml DELETED
@@ -1,9 +0,0 @@
1
- model:
2
- target: tencentarc.t21_adapter
3
- params:
4
- channels: [320, 640, 1280, 1280]
5
- nums_rb: 2
6
- ksize: 1
7
- sk: true
8
- cin: 192
9
- use_conv: false
 
 
 
 
 
 
 
 
 
 
t2iadapter_seg_sd14v1.yaml DELETED
@@ -1,9 +0,0 @@
1
- model:
2
- target: tencentarc.t21_adapter
3
- params:
4
- channels: [320, 640, 1280, 1280]
5
- nums_rb: 2
6
- ksize: 1
7
- sk: true
8
- cin: 192
9
- use_conv: false
 
 
 
 
 
 
 
 
 
 
t2iadapter_sketch_sd14v1.yaml DELETED
@@ -1,9 +0,0 @@
1
- model:
2
- target: tencentarc.t21_adapter
3
- params:
4
- channels: [320, 640, 1280, 1280]
5
- nums_rb: 2
6
- ksize: 1
7
- sk: true
8
- cin: 64
9
- use_conv: false